upstream/mercurial-mirror Commit - r46576:a66568f2

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

policy,

20

pycompat,

21

pycompat,

21

util,

22

util,

22

)

23

)

23

24

25

from .utils import stringutil

26

from .utils import stringutil

26

27

from .revlogutils import flagutil

28

from .revlogutils import flagutil

28

29

30

rustmod = policy.importrust("copy_tracing")

31

29

32

30

def _filter(src, dst, t):

33

def _filter(src, dst, t):

31

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

32

35

33

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

34

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

35

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

36

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

37

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

38

#

41

#

39

# case src mid dst result

42

# case src mid dst result

40

# 1 x y - -

43

# 1 x y - -

41

# 2 x y y x->y

44

# 2 x y y x->y

42

# 3 x y x -

45

# 3 x y x -

43

# 4 x y z x->z

46

# 4 x y z x->z

44

# 5 - x y -

47

# 5 - x y -

45

# 6 x x y x->y

48

# 6 x x y x->y

46

#

49

#

47

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

48

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

49

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

50

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

51

54

52

for k, v in list(t.items()):

55

for k, v in list(t.items()):

53

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

54

if v not in src:

57

if v not in src:

55

del t[k]

58

del t[k]

56

# remove criss-crossed copies

59

# remove criss-crossed copies

57

elif k in src and v in dst:

60

elif k in src and v in dst:

58

del t[k]

61

del t[k]

59

# remove copies to files that were then removed

62

# remove copies to files that were then removed

60

elif k not in dst:

63

elif k not in dst:

61

del t[k]

64

del t[k]

62

65

63

66

64

def _chain(prefix, suffix):

67

def _chain(prefix, suffix):

65

"""chain two sets of copies 'prefix' and 'suffix'"""

68

"""chain two sets of copies 'prefix' and 'suffix'"""

66

result = prefix.copy()

69

result = prefix.copy()

67

for key, value in pycompat.iteritems(suffix):

70

for key, value in pycompat.iteritems(suffix):

68

result[key] = prefix.get(value, value)

71

result[key] = prefix.get(value, value)

69

return result

72

return result

70

73

71

74

72

def _tracefile(fctx, am, basemf):

75

def _tracefile(fctx, am, basemf):

73

"""return file context that is the ancestor of fctx present in ancestor

76

"""return file context that is the ancestor of fctx present in ancestor

74

manifest am

77

manifest am

75

78

76

Note: we used to try and stop after a given limit, however checking if that

79

Note: we used to try and stop after a given limit, however checking if that

77

limit is reached turned out to be very expensive. we are better off

80

limit is reached turned out to be very expensive. we are better off

78

disabling that feature."""

81

disabling that feature."""

79

82

80

for f in fctx.ancestors():

83

for f in fctx.ancestors():

81

path = f.path()

84

path = f.path()

82

if am.get(path, None) == f.filenode():

85

if am.get(path, None) == f.filenode():

83

return path

86

return path

84

if basemf and basemf.get(path, None) == f.filenode():

87

if basemf and basemf.get(path, None) == f.filenode():

85

return path

88

return path

86

89

87

90

88

def _dirstatecopies(repo, match=None):

91

def _dirstatecopies(repo, match=None):

89

ds = repo.dirstate

92

ds = repo.dirstate

90

c = ds.copies().copy()

93

c = ds.copies().copy()

91

for k in list(c):

94

for k in list(c):

92

if ds[k] not in b'anm' or (match and not match(k)):

95

if ds[k] not in b'anm' or (match and not match(k)):

93

del c[k]

96

del c[k]

94

return c

97

return c

95

98

96

99

97

def _computeforwardmissing(a, b, match=None):

100

def _computeforwardmissing(a, b, match=None):

98

"""Computes which files are in b but not a.

101

"""Computes which files are in b but not a.

99

This is its own function so extensions can easily wrap this call to see what

102

This is its own function so extensions can easily wrap this call to see what

100

files _forwardcopies is about to process.

103

files _forwardcopies is about to process.

101

"""

104

"""

102

ma = a.manifest()

105

ma = a.manifest()

103

mb = b.manifest()

106

mb = b.manifest()

104

return mb.filesnotin(ma, match=match)

107

return mb.filesnotin(ma, match=match)

105

108

106

109

107

def usechangesetcentricalgo(repo):

110

def usechangesetcentricalgo(repo):

108

"""Checks if we should use changeset-centric copy algorithms"""

111

"""Checks if we should use changeset-centric copy algorithms"""

109

if repo.filecopiesmode == b'changeset-sidedata':

112

if repo.filecopiesmode == b'changeset-sidedata':

110

return True

113

return True

111

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

112

changesetsource = (b'changeset-only', b'compatibility')

115

changesetsource = (b'changeset-only', b'compatibility')

113

return readfrom in changesetsource

116

return readfrom in changesetsource

114

117

115

118

116

def _committedforwardcopies(a, b, base, match):

119

def _committedforwardcopies(a, b, base, match):

117

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

118

# files might have to be traced back to the fctx parent of the last

121

# files might have to be traced back to the fctx parent of the last

119

# one-side-only changeset, but not further back than that

122

# one-side-only changeset, but not further back than that

120

repo = a._repo

123

repo = a._repo

121

124

122

if usechangesetcentricalgo(repo):

125

if usechangesetcentricalgo(repo):

123

return _changesetforwardcopies(a, b, match)

126

return _changesetforwardcopies(a, b, match)

124

127

125

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

126

dbg = repo.ui.debug

129

dbg = repo.ui.debug

127

if debug:

130

if debug:

128

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

129

am = a.manifest()

132

am = a.manifest()

130

basemf = None if base is None else base.manifest()

133

basemf = None if base is None else base.manifest()

131

134

132

# find where new files came from

135

# find where new files came from

133

# we currently don't try to find where old files went, too expensive

136

# we currently don't try to find where old files went, too expensive

134

# this means we can miss a case like 'hg rm b; hg cp a b'

137

# this means we can miss a case like 'hg rm b; hg cp a b'

135

cm = {}

138

cm = {}

136

139

137

# Computing the forward missing is quite expensive on large manifests, since

140

# Computing the forward missing is quite expensive on large manifests, since

138

# it compares the entire manifests. We can optimize it in the common use

141

# it compares the entire manifests. We can optimize it in the common use

139

# case of computing what copies are in a commit versus its parent (like

142

# case of computing what copies are in a commit versus its parent (like

140

# during a rebase or histedit). Note, we exclude merge commits from this

143

# during a rebase or histedit). Note, we exclude merge commits from this

141

# optimization, since the ctx.files() for a merge commit is not correct for

144

# optimization, since the ctx.files() for a merge commit is not correct for

142

# this comparison.

145

# this comparison.

143

forwardmissingmatch = match

146

forwardmissingmatch = match

144

if b.p1() == a and b.p2().node() == node.nullid:

147

if b.p1() == a and b.p2().node() == node.nullid:

145

filesmatcher = matchmod.exact(b.files())

148

filesmatcher = matchmod.exact(b.files())

146

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

147

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

148

151

149

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

150

153

151

if debug:

154

if debug:

152

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

153

156

154

for f in sorted(missing):

157

for f in sorted(missing):

155

if debug:

158

if debug:

156

dbg(b'debug.copies: tracing file: %s\n' % f)

159

dbg(b'debug.copies: tracing file: %s\n' % f)

157

fctx = b[f]

160

fctx = b[f]

158

fctx._ancestrycontext = ancestrycontext

161

fctx._ancestrycontext = ancestrycontext

159

162

160

if debug:

163

if debug:

161

start = util.timer()

164

start = util.timer()

162

opath = _tracefile(fctx, am, basemf)

165

opath = _tracefile(fctx, am, basemf)

163

if opath:

166

if opath:

164

if debug:

167

if debug:

165

dbg(b'debug.copies: rename of: %s\n' % opath)

168

dbg(b'debug.copies: rename of: %s\n' % opath)

166

cm[f] = opath

169

cm[f] = opath

167

if debug:

170

if debug:

168

dbg(

171

dbg(

169

b'debug.copies: time: %f seconds\n'

172

b'debug.copies: time: %f seconds\n'

170

% (util.timer() - start)

173

% (util.timer() - start)

171

)

174

)

172

return cm

175

return cm

173

176

174

177

175

def _revinfo_getter(repo):

178

def _revinfo_getter(repo):

176

"""returns a function that returns the following data given a <rev>"

179

"""returns a function that returns the following data given a <rev>"

177

180

178

* p1: revision number of first parent

181

* p1: revision number of first parent

179

* p2: revision number of first parent

182

* p2: revision number of first parent

180

* changes: a ChangingFiles object

183

* changes: a ChangingFiles object

181

"""

184

"""

182

cl = repo.changelog

185

cl = repo.changelog

183

parents = cl.parentrevs

186

parents = cl.parentrevs

184

flags = cl.flags

187

flags = cl.flags

185

188

186

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

189

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

187

190

188

changelogrevision = cl.changelogrevision

191

changelogrevision = cl.changelogrevision

189

192

190

# A small cache to avoid doing the work twice for merges

193

# A small cache to avoid doing the work twice for merges

191

#

194

#

192

# In the vast majority of cases, if we ask information for a revision

195

# In the vast majority of cases, if we ask information for a revision

193

# about 1 parent, we'll later ask it for the other. So it make sense to

196

# about 1 parent, we'll later ask it for the other. So it make sense to

194

# keep the information around when reaching the first parent of a merge

197

# keep the information around when reaching the first parent of a merge

195

# and dropping it after it was provided for the second parents.

198

# and dropping it after it was provided for the second parents.

196

#

199

#

197

# It exists cases were only one parent of the merge will be walked. It

200

# It exists cases were only one parent of the merge will be walked. It

198

# happens when the "destination" the copy tracing is descendant from a

201

# happens when the "destination" the copy tracing is descendant from a

199

# new root, not common with the "source". In that case, we will only walk

202

# new root, not common with the "source". In that case, we will only walk

200

# through merge parents that are descendant of changesets common

203

# through merge parents that are descendant of changesets common

201

# between "source" and "destination".

204

# between "source" and "destination".

202

#

205

#

203

# With the current case implementation if such changesets have a copy

206

# With the current case implementation if such changesets have a copy

204

# information, we'll keep them in memory until the end of

207

# information, we'll keep them in memory until the end of

205

# _changesetforwardcopies. We don't expect the case to be frequent

208

# _changesetforwardcopies. We don't expect the case to be frequent

206

# enough to matters.

209

# enough to matters.

207

#

210

#

208

# In addition, it would be possible to reach pathological case, were

211

# In addition, it would be possible to reach pathological case, were

209

# many first parent are met before any second parent is reached. In

212

# many first parent are met before any second parent is reached. In

210

# that case the cache could grow. If this even become an issue one can

213

# that case the cache could grow. If this even become an issue one can

211

# safely introduce a maximum cache size. This would trade extra CPU/IO

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

212

# time to save memory.

215

# time to save memory.

213

merge_caches = {}

216

merge_caches = {}

214

217

215

def revinfo(rev):

218

def revinfo(rev):

216

p1, p2 = parents(rev)

219

p1, p2 = parents(rev)

217

value = None

220

value = None

218

e = merge_caches.pop(rev, None)

221

e = merge_caches.pop(rev, None)

219

if e is not None:

222

if e is not None:

220

return e

223

return e

221

changes = None

224

changes = None

222

if flags(rev) & HASCOPIESINFO:

225

if flags(rev) & HASCOPIESINFO:

223

changes = changelogrevision(rev).changes

226

changes = changelogrevision(rev).changes

224

value = (p1, p2, changes)

227

value = (p1, p2, changes)

225

if p1 != node.nullrev and p2 != node.nullrev:

228

if p1 != node.nullrev and p2 != node.nullrev:

226

# XXX some case we over cache, IGNORE

229

# XXX some case we over cache, IGNORE

227

merge_caches[rev] = value

230

merge_caches[rev] = value

228

return value

231

return value

229

232

230

return revinfo

233

return revinfo

231

234

232

235

233

def cached_is_ancestor(is_ancestor):

236

def cached_is_ancestor(is_ancestor):

234

"""return a cached version of is_ancestor"""

237

"""return a cached version of is_ancestor"""

235

cache = {}

238

cache = {}

236

239

237

def _is_ancestor(anc, desc):

240

def _is_ancestor(anc, desc):

238

if anc > desc:

241

if anc > desc:

239

return False

242

return False

240

elif anc == desc:

243

elif anc == desc:

241

return True

244

return True

242

key = (anc, desc)

245

key = (anc, desc)

243

ret = cache.get(key)

246

ret = cache.get(key)

244

if ret is None:

247

if ret is None:

245

ret = cache[key] = is_ancestor(anc, desc)

248

ret = cache[key] = is_ancestor(anc, desc)

246

return ret

249

return ret

247

250

248

return _is_ancestor

251

return _is_ancestor

249

252

250

253

251

def _changesetforwardcopies(a, b, match):

254

def _changesetforwardcopies(a, b, match):

252

if a.rev() in (node.nullrev, b.rev()):

255

if a.rev() in (node.nullrev, b.rev()):

253

return {}

256

return {}

254

257

255

repo = a.repo().unfiltered()

258

repo = a.repo().unfiltered()

256

children = {}

259

children = {}

257

260

258

cl = repo.changelog

261

cl = repo.changelog

259

isancestor = cached_is_ancestor(cl.isancestorrev)

262

isancestor = cached_is_ancestor(cl.isancestorrev)

260

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

263

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

261

mrset = set(missingrevs)

264

mrset = set(missingrevs)

262

roots = set()

265

roots = set()

263

for r in missingrevs:

266

for r in missingrevs:

264

for p in cl.parentrevs(r):

267

for p in cl.parentrevs(r):

265

if p == node.nullrev:

268

if p == node.nullrev:

266

continue

269

continue

267

if p not in children:

270

if p not in children:

268

children[p] = [r]

271

children[p] = [r]

269

else:

272

else:

270

children[p].append(r)

273

children[p].append(r)

271

if p not in mrset:

274

if p not in mrset:

272

roots.add(p)

275

roots.add(p)

273

if not roots:

276

if not roots:

274

# no common revision to track copies from

277

# no common revision to track copies from

275

return {}

278

return {}

276

min_root = min(roots)

279

min_root = min(roots)

277

280

278

from_head = set(

281

from_head = set(

279

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

282

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

280

)

283

)

281

284

282

iterrevs = set(from_head)

285

iterrevs = set(from_head)

283

iterrevs &= mrset

286

iterrevs &= mrset

284

iterrevs.update(roots)

287

iterrevs.update(roots)

285

iterrevs.remove(b.rev())

288

iterrevs.remove(b.rev())

286

revs = sorted(iterrevs)

289

revs = sorted(iterrevs)

287

290

288

if repo.filecopiesmode == b'changeset-sidedata':

291

if repo.filecopiesmode == b'changeset-sidedata':

289

revinfo = _revinfo_getter(repo)

292

revinfo = _revinfo_getter(repo)

290

return _combine_changeset_copies(

293

return _combine_changeset_copies(

291

revs, children, b.rev(), revinfo, match, isancestor

294

revs, children, b.rev(), revinfo, match, isancestor

292

)

295

)

293

else:

296

else:

294

revinfo = _revinfo_getter_extra(repo)

297

revinfo = _revinfo_getter_extra(repo)

295

return _combine_changeset_copies_extra(

298

return _combine_changeset_copies_extra(

296

revs, children, b.rev(), revinfo, match, isancestor

299

revs, children, b.rev(), revinfo, match, isancestor

297

)

300

)

298

301

299

302

300

def _combine_changeset_copies(

303

def _combine_changeset_copies(

301

revs, children, targetrev, revinfo, match, isancestor

304

revs, children, targetrev, revinfo, match, isancestor

302

):

305

):

303

"""combine the copies information for each item of iterrevs

306

"""combine the copies information for each item of iterrevs

304

307

305

revs: sorted iterable of revision to visit

308

revs: sorted iterable of revision to visit

306

children: a {parent: [children]} mapping.

309

children: a {parent: [children]} mapping.

307

targetrev: the final copies destination revision (not in iterrevs)

310

targetrev: the final copies destination revision (not in iterrevs)

308

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

311

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

309

match: a matcher

312

match: a matcher

310

313

311

It returns the aggregated copies information for `targetrev`.

314

It returns the aggregated copies information for `targetrev`.

312

"""

315

"""

316

317

alwaysmatch = match.always()

318

319

if rustmod is not None and alwaysmatch:

320

return rustmod.combine_changeset_copies(

321

list(revs), children, targetrev, revinfo, isancestor

322

)

323

313

all_copies = {}

324

all_copies = {}

314

alwaysmatch = match.always()

315

for r in revs:

325

for r in revs:

316

copies = all_copies.pop(r, None)

326

copies = all_copies.pop(r, None)

317

if copies is None:

327

if copies is None:

318

# this is a root

328

# this is a root

319

copies = {}

329

copies = {}

320

for i, c in enumerate(children[r]):

330

for i, c in enumerate(children[r]):

321

p1, p2, changes = revinfo(c)

331

p1, p2, changes = revinfo(c)

322

childcopies = {}

332

childcopies = {}

323

if r == p1:

333

if r == p1:

324

parent = 1

334

parent = 1

325

if changes is not None:

335

if changes is not None:

326

childcopies = changes.copied_from_p1

336

childcopies = changes.copied_from_p1

327

else:

337

else:

328

assert r == p2

338

assert r == p2

329

parent = 2

339

parent = 2

330

if changes is not None:

340

if changes is not None:

331

childcopies = changes.copied_from_p2

341

childcopies = changes.copied_from_p2

332

if not alwaysmatch:

342

if not alwaysmatch:

333

childcopies = {

343

childcopies = {

334

dst: src for dst, src in childcopies.items() if match(dst)

344

dst: src for dst, src in childcopies.items() if match(dst)

335

}

345

}

336

newcopies = copies

346

newcopies = copies

337

if childcopies:

347

if childcopies:

338

newcopies = copies.copy()

348

newcopies = copies.copy()

339

for dest, source in pycompat.iteritems(childcopies):

349

for dest, source in pycompat.iteritems(childcopies):

340

prev = copies.get(source)

350

prev = copies.get(source)

341

if prev is not None and prev[1] is not None:

351

if prev is not None and prev[1] is not None:

342

source = prev[1]

352

source = prev[1]

343

newcopies[dest] = (c, source)

353

newcopies[dest] = (c, source)

344

assert newcopies is not copies

354

assert newcopies is not copies

345

if changes is not None:

355

if changes is not None:

346

for f in changes.removed:

356

for f in changes.removed:

347

if f in newcopies:

357

if f in newcopies:

348

if newcopies is copies:

358

if newcopies is copies:

349

# copy on write to avoid affecting potential other

359

# copy on write to avoid affecting potential other

350

# branches. when there are no other branches, this

360

# branches. when there are no other branches, this

351

# could be avoided.

361

# could be avoided.

352

newcopies = copies.copy()

362

newcopies = copies.copy()

353

newcopies[f] = (c, None)

363

newcopies[f] = (c, None)

354

othercopies = all_copies.get(c)

364

othercopies = all_copies.get(c)

355

if othercopies is None:

365

if othercopies is None:

356

all_copies[c] = newcopies

366

all_copies[c] = newcopies

357

else:

367

else:

358

# we are the second parent to work on c, we need to merge our

368

# we are the second parent to work on c, we need to merge our

359

# work with the other.

369

# work with the other.

360

#

370

#

361

# In case of conflict, parent 1 take precedence over parent 2.

371

# In case of conflict, parent 1 take precedence over parent 2.

362

# This is an arbitrary choice made anew when implementing

372

# This is an arbitrary choice made anew when implementing

363

# changeset based copies. It was made without regards with

373

# changeset based copies. It was made without regards with

364

# potential filelog related behavior.

374

# potential filelog related behavior.

365

if parent == 1:

375

if parent == 1:

366

_merge_copies_dict(

376

_merge_copies_dict(

367

othercopies, newcopies, isancestor, changes

377

othercopies, newcopies, isancestor, changes

368

)

378

)

369

else:

379

else:

370

_merge_copies_dict(

380

_merge_copies_dict(

371

newcopies, othercopies, isancestor, changes

381

newcopies, othercopies, isancestor, changes

372

)

382

)

373

all_copies[c] = newcopies

383

all_copies[c] = newcopies

374

384

375

final_copies = {}

385

final_copies = {}

376

for dest, (tt, source) in all_copies[targetrev].items():

386

for dest, (tt, source) in all_copies[targetrev].items():

377

if source is not None:

387

if source is not None:

378

final_copies[dest] = source

388

final_copies[dest] = source

379

return final_copies

389

return final_copies

380

390

381

391

382

def _merge_copies_dict(minor, major, isancestor, changes):

392

def _merge_copies_dict(minor, major, isancestor, changes):

383

"""merge two copies-mapping together, minor and major

393

"""merge two copies-mapping together, minor and major

384

394

385

In case of conflict, value from "major" will be picked.

395

In case of conflict, value from "major" will be picked.

386

396

387

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

397

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

388

ancestors of `high_rev`,

398

ancestors of `high_rev`,

389

399

390

- `ismerged(path)`: callable return True if `path` have been merged in the

400

- `ismerged(path)`: callable return True if `path` have been merged in the

391

current revision,

401

current revision,

392

"""

402

"""

393

for dest, value in major.items():

403

for dest, value in major.items():

394

other = minor.get(dest)

404

other = minor.get(dest)

395

if other is None:

405

if other is None:

396

minor[dest] = value

406

minor[dest] = value

397

else:

407

else:

398

new_tt = value[0]

408

new_tt = value[0]

399

other_tt = other[0]

409

other_tt = other[0]

400

if value[1] == other[1]:

410

if value[1] == other[1]:

401

continue

411

continue

402

# content from "major" wins, unless it is older

412

# content from "major" wins, unless it is older

403

# than the branch point or there is a merge

413

# than the branch point or there is a merge

404

if new_tt == other_tt:

414

if new_tt == other_tt:

405

minor[dest] = value

415

minor[dest] = value

406

elif (

416

elif (

407

changes is not None

417

changes is not None

408

and value[1] is None

418

and value[1] is None

409

and dest in changes.salvaged

419

and dest in changes.salvaged

410

):

420

):

411

pass

421

pass

412

elif (

422

elif (

413

changes is not None

423

changes is not None

414

and other[1] is None

424

and other[1] is None

415

and dest in changes.salvaged

425

and dest in changes.salvaged

416

):

426

):

417

minor[dest] = value

427

minor[dest] = value

418

elif changes is not None and dest in changes.merged:

428

elif changes is not None and dest in changes.merged:

419

minor[dest] = value

429

minor[dest] = value

420

elif not isancestor(new_tt, other_tt):

430

elif not isancestor(new_tt, other_tt):

421

if value[1] is not None:

431

if value[1] is not None:

422

minor[dest] = value

432

minor[dest] = value

423

elif isancestor(other_tt, new_tt):

433

elif isancestor(other_tt, new_tt):

424

minor[dest] = value

434

minor[dest] = value

425

435

426

436

427

def _revinfo_getter_extra(repo):

437

def _revinfo_getter_extra(repo):

428

"""return a function that return multiple data given a <rev>"i

438

"""return a function that return multiple data given a <rev>"i

429

439

430

* p1: revision number of first parent

440

* p1: revision number of first parent

431

* p2: revision number of first parent

441

* p2: revision number of first parent

432

* p1copies: mapping of copies from p1

442

* p1copies: mapping of copies from p1

433

* p2copies: mapping of copies from p2

443

* p2copies: mapping of copies from p2

434

* removed: a list of removed files

444

* removed: a list of removed files

435

* ismerged: a callback to know if file was merged in that revision

445

* ismerged: a callback to know if file was merged in that revision

436

"""

446

"""

437

cl = repo.changelog

447

cl = repo.changelog

438

parents = cl.parentrevs

448

parents = cl.parentrevs

439

449

440

def get_ismerged(rev):

450

def get_ismerged(rev):

441

ctx = repo[rev]

451

ctx = repo[rev]

442

452

443

def ismerged(path):

453

def ismerged(path):

444

if path not in ctx.files():

454

if path not in ctx.files():

445

return False

455

return False

446

fctx = ctx[path]

456

fctx = ctx[path]

447

parents = fctx._filelog.parents(fctx._filenode)

457

parents = fctx._filelog.parents(fctx._filenode)

448

nb_parents = 0

458

nb_parents = 0

449

for n in parents:

459

for n in parents:

450

if n != node.nullid:

460

if n != node.nullid:

451

nb_parents += 1

461

nb_parents += 1

452

return nb_parents >= 2

462

return nb_parents >= 2

453

463

454

return ismerged

464

return ismerged

455

465

456

def revinfo(rev):

466

def revinfo(rev):

457

p1, p2 = parents(rev)

467

p1, p2 = parents(rev)

458

ctx = repo[rev]

468

ctx = repo[rev]

459

p1copies, p2copies = ctx._copies

469

p1copies, p2copies = ctx._copies

460

removed = ctx.filesremoved()

470

removed = ctx.filesremoved()

461

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

471

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

462

472

463

return revinfo

473

return revinfo

464

474

465

475

466

def _combine_changeset_copies_extra(

476

def _combine_changeset_copies_extra(

467

revs, children, targetrev, revinfo, match, isancestor

477

revs, children, targetrev, revinfo, match, isancestor

468

):

478

):

469

"""version of `_combine_changeset_copies` that works with the Google

479

"""version of `_combine_changeset_copies` that works with the Google

470

specific "extra" based storage for copy information"""

480

specific "extra" based storage for copy information"""

471

all_copies = {}

481

all_copies = {}

472

alwaysmatch = match.always()

482

alwaysmatch = match.always()

473

for r in revs:

483

for r in revs:

474

copies = all_copies.pop(r, None)

484

copies = all_copies.pop(r, None)

475

if copies is None:

485

if copies is None:

476

# this is a root

486

# this is a root

477

copies = {}

487

copies = {}

478

for i, c in enumerate(children[r]):

488

for i, c in enumerate(children[r]):

479

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

489

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

480

if r == p1:

490

if r == p1:

481

parent = 1

491

parent = 1

482

childcopies = p1copies

492

childcopies = p1copies

483

else:

493

else:

484

assert r == p2

494

assert r == p2

485

parent = 2

495

parent = 2

486

childcopies = p2copies

496

childcopies = p2copies

487

if not alwaysmatch:

497

if not alwaysmatch:

488

childcopies = {

498

childcopies = {

489

dst: src for dst, src in childcopies.items() if match(dst)

499

dst: src for dst, src in childcopies.items() if match(dst)

490

}

500

}

491

newcopies = copies

501

newcopies = copies

492

if childcopies:

502

if childcopies:

493

newcopies = copies.copy()

503

newcopies = copies.copy()

494

for dest, source in pycompat.iteritems(childcopies):

504

for dest, source in pycompat.iteritems(childcopies):

495

prev = copies.get(source)

505

prev = copies.get(source)

496

if prev is not None and prev[1] is not None:

506

if prev is not None and prev[1] is not None:

497

source = prev[1]

507

source = prev[1]

498

newcopies[dest] = (c, source)

508

newcopies[dest] = (c, source)

499

assert newcopies is not copies

509

assert newcopies is not copies

500

for f in removed:

510

for f in removed:

501

if f in newcopies:

511

if f in newcopies:

502

if newcopies is copies:

512

if newcopies is copies:

503

# copy on write to avoid affecting potential other

513

# copy on write to avoid affecting potential other

504

# branches. when there are no other branches, this

514

# branches. when there are no other branches, this

505

# could be avoided.

515

# could be avoided.

506

newcopies = copies.copy()

516

newcopies = copies.copy()

507

newcopies[f] = (c, None)

517

newcopies[f] = (c, None)

508

othercopies = all_copies.get(c)

518

othercopies = all_copies.get(c)

509

if othercopies is None:

519

if othercopies is None:

510

all_copies[c] = newcopies

520

all_copies[c] = newcopies

511

else:

521

else:

512

# we are the second parent to work on c, we need to merge our

522

# we are the second parent to work on c, we need to merge our

513

# work with the other.

523

# work with the other.

514

#

524

#

515

# In case of conflict, parent 1 take precedence over parent 2.

525

# In case of conflict, parent 1 take precedence over parent 2.

516

# This is an arbitrary choice made anew when implementing

526

# This is an arbitrary choice made anew when implementing

517

# changeset based copies. It was made without regards with

527

# changeset based copies. It was made without regards with

518

# potential filelog related behavior.

528

# potential filelog related behavior.

519

if parent == 1:

529

if parent == 1:

520

_merge_copies_dict_extra(

530

_merge_copies_dict_extra(

521

othercopies, newcopies, isancestor, ismerged

531

othercopies, newcopies, isancestor, ismerged

522

)

532

)

523

else:

533

else:

524

_merge_copies_dict_extra(

534

_merge_copies_dict_extra(

525

newcopies, othercopies, isancestor, ismerged

535

newcopies, othercopies, isancestor, ismerged

526

)

536

)

527

all_copies[c] = newcopies

537

all_copies[c] = newcopies

528

538

529

final_copies = {}

539

final_copies = {}

530

for dest, (tt, source) in all_copies[targetrev].items():

540

for dest, (tt, source) in all_copies[targetrev].items():

531

if source is not None:

541

if source is not None:

532

final_copies[dest] = source

542

final_copies[dest] = source

533

return final_copies

543

return final_copies

534

544

535

545

536

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

546

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

537

"""version of `_merge_copies_dict` that works with the Google

547

"""version of `_merge_copies_dict` that works with the Google

538

specific "extra" based storage for copy information"""

548

specific "extra" based storage for copy information"""

539

for dest, value in major.items():

549

for dest, value in major.items():

540

other = minor.get(dest)

550

other = minor.get(dest)

541

if other is None:

551

if other is None:

542

minor[dest] = value

552

minor[dest] = value

543

else:

553

else:

544

new_tt = value[0]

554

new_tt = value[0]

545

other_tt = other[0]

555

other_tt = other[0]

546

if value[1] == other[1]:

556

if value[1] == other[1]:

547

continue

557

continue

548

# content from "major" wins, unless it is older

558

# content from "major" wins, unless it is older

549

# than the branch point or there is a merge

559

# than the branch point or there is a merge

550

if (

560

if (

551

new_tt == other_tt

561

new_tt == other_tt

552

or not isancestor(new_tt, other_tt)

562

or not isancestor(new_tt, other_tt)

553

or ismerged(dest)

563

or ismerged(dest)

554

):

564

):

555

minor[dest] = value

565

minor[dest] = value

556

566

557

567

558

def _forwardcopies(a, b, base=None, match=None):

568

def _forwardcopies(a, b, base=None, match=None):

559

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

569

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

560

570

561

if base is None:

571

if base is None:

562

base = a

572

base = a

563

match = a.repo().narrowmatch(match)

573

match = a.repo().narrowmatch(match)

564

# check for working copy

574

# check for working copy

565

if b.rev() is None:

575

if b.rev() is None:

566

cm = _committedforwardcopies(a, b.p1(), base, match)

576

cm = _committedforwardcopies(a, b.p1(), base, match)

567

# combine copies from dirstate if necessary

577

# combine copies from dirstate if necessary

568

copies = _chain(cm, _dirstatecopies(b._repo, match))

578

copies = _chain(cm, _dirstatecopies(b._repo, match))

569

else:

579

else:

570

copies = _committedforwardcopies(a, b, base, match)

580

copies = _committedforwardcopies(a, b, base, match)

571

return copies

581

return copies

572

582

573

583

574

def _backwardrenames(a, b, match):

584

def _backwardrenames(a, b, match):

575

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

585

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

576

return {}

586

return {}

577

587

578

# Even though we're not taking copies into account, 1:n rename situations

588

# Even though we're not taking copies into account, 1:n rename situations

579

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

589

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

580

# arbitrarily pick one of the renames.

590

# arbitrarily pick one of the renames.

581

# We don't want to pass in "match" here, since that would filter

591

# We don't want to pass in "match" here, since that would filter

582

# the destination by it. Since we're reversing the copies, we want

592

# the destination by it. Since we're reversing the copies, we want

583

# to filter the source instead.

593

# to filter the source instead.

584

f = _forwardcopies(b, a)

594

f = _forwardcopies(b, a)

585

r = {}

595

r = {}

586

for k, v in sorted(pycompat.iteritems(f)):

596

for k, v in sorted(pycompat.iteritems(f)):

587

if match and not match(v):

597

if match and not match(v):

588

continue

598

continue

589

# remove copies

599

# remove copies

590

if v in a:

600

if v in a:

591

continue

601

continue

592

r[v] = k

602

r[v] = k

593

return r

603

return r

594

604

595

605

596

def pathcopies(x, y, match=None):

606

def pathcopies(x, y, match=None):

597

"""find {dst@y: src@x} copy mapping for directed compare"""

607

"""find {dst@y: src@x} copy mapping for directed compare"""

598

repo = x._repo

608

repo = x._repo

599

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

609

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

600

if debug:

610

if debug:

601

repo.ui.debug(

611

repo.ui.debug(

602

b'debug.copies: searching copies from %s to %s\n' % (x, y)

612

b'debug.copies: searching copies from %s to %s\n' % (x, y)

603

)

613

)

604

if x == y or not x or not y:

614

if x == y or not x or not y:

605

return {}

615

return {}

606

if y.rev() is None and x == y.p1():

616

if y.rev() is None and x == y.p1():

607

if debug:

617

if debug:

608

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

618

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

609

# short-circuit to avoid issues with merge states

619

# short-circuit to avoid issues with merge states

610

return _dirstatecopies(repo, match)

620

return _dirstatecopies(repo, match)

611

a = y.ancestor(x)

621

a = y.ancestor(x)

612

if a == x:

622

if a == x:

613

if debug:

623

if debug:

614

repo.ui.debug(b'debug.copies: search mode: forward\n')

624

repo.ui.debug(b'debug.copies: search mode: forward\n')

615

copies = _forwardcopies(x, y, match=match)

625

copies = _forwardcopies(x, y, match=match)

616

elif a == y:

626

elif a == y:

617

if debug:

627

if debug:

618

repo.ui.debug(b'debug.copies: search mode: backward\n')

628

repo.ui.debug(b'debug.copies: search mode: backward\n')

619

copies = _backwardrenames(x, y, match=match)

629

copies = _backwardrenames(x, y, match=match)

620

else:

630

else:

621

if debug:

631

if debug:

622

repo.ui.debug(b'debug.copies: search mode: combined\n')

632

repo.ui.debug(b'debug.copies: search mode: combined\n')

623

base = None

633

base = None

624

if a.rev() != node.nullrev:

634

if a.rev() != node.nullrev:

625

base = x

635

base = x

626

copies = _chain(

636

copies = _chain(

627

_backwardrenames(x, a, match=match),

637

_backwardrenames(x, a, match=match),

628

_forwardcopies(a, y, base, match=match),

638

_forwardcopies(a, y, base, match=match),

629

)

639

)

630

_filter(x, y, copies)

640

_filter(x, y, copies)

631

return copies

641

return copies

632

642

633

643

634

def mergecopies(repo, c1, c2, base):

644

def mergecopies(repo, c1, c2, base):

635

"""

645

"""

636

Finds moves and copies between context c1 and c2 that are relevant for

646

Finds moves and copies between context c1 and c2 that are relevant for

637

merging. 'base' will be used as the merge base.

647

merging. 'base' will be used as the merge base.

638

648

639

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

649

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

640

files that were moved/ copied in one merge parent and modified in another.

650

files that were moved/ copied in one merge parent and modified in another.

641

For example:

651

For example:

642

652

643

o ---> 4 another commit

653

o ---> 4 another commit

644

|

654

|

645

| o ---> 3 commit that modifies a.txt

655

| o ---> 3 commit that modifies a.txt

646

| /

656

| /

647

o / ---> 2 commit that moves a.txt to b.txt

657

o / ---> 2 commit that moves a.txt to b.txt

648

|/

658

|/

649

o ---> 1 merge base

659

o ---> 1 merge base

650

660

651

If we try to rebase revision 3 on revision 4, since there is no a.txt in

661

If we try to rebase revision 3 on revision 4, since there is no a.txt in

652

revision 4, and if user have copytrace disabled, we prints the following

662

revision 4, and if user have copytrace disabled, we prints the following

653

message:

663

message:

654

664

655

```other changed <file> which local deleted```

665

```other changed <file> which local deleted```

656

666

657

Returns a tuple where:

667

Returns a tuple where:

658

668

659

"branch_copies" an instance of branch_copies.

669

"branch_copies" an instance of branch_copies.

660

670

661

"diverge" is a mapping of source name -> list of destination names

671

"diverge" is a mapping of source name -> list of destination names

662

for divergent renames.

672

for divergent renames.

663

673

664

This function calls different copytracing algorithms based on config.

674

This function calls different copytracing algorithms based on config.

665

"""

675

"""

666

# avoid silly behavior for update from empty dir

676

# avoid silly behavior for update from empty dir

667

if not c1 or not c2 or c1 == c2:

677

if not c1 or not c2 or c1 == c2:

668

return branch_copies(), branch_copies(), {}

678

return branch_copies(), branch_copies(), {}

669

679

670

narrowmatch = c1.repo().narrowmatch()

680

narrowmatch = c1.repo().narrowmatch()

671

681

672

# avoid silly behavior for parent -> working dir

682

# avoid silly behavior for parent -> working dir

673

if c2.node() is None and c1.node() == repo.dirstate.p1():

683

if c2.node() is None and c1.node() == repo.dirstate.p1():

674

return (

684

return (

675

branch_copies(_dirstatecopies(repo, narrowmatch)),

685

branch_copies(_dirstatecopies(repo, narrowmatch)),

676

branch_copies(),

686

branch_copies(),

677

{},

687

{},

678

)

688

)

679

689

680

copytracing = repo.ui.config(b'experimental', b'copytrace')

690

copytracing = repo.ui.config(b'experimental', b'copytrace')

681

if stringutil.parsebool(copytracing) is False:

691

if stringutil.parsebool(copytracing) is False:

682

# stringutil.parsebool() returns None when it is unable to parse the

692

# stringutil.parsebool() returns None when it is unable to parse the

683

# value, so we should rely on making sure copytracing is on such cases

693

# value, so we should rely on making sure copytracing is on such cases

684

return branch_copies(), branch_copies(), {}

694

return branch_copies(), branch_copies(), {}

685

695

686

if usechangesetcentricalgo(repo):

696

if usechangesetcentricalgo(repo):

687

# The heuristics don't make sense when we need changeset-centric algos

697

# The heuristics don't make sense when we need changeset-centric algos

688

return _fullcopytracing(repo, c1, c2, base)

698

return _fullcopytracing(repo, c1, c2, base)

689

699

690

# Copy trace disabling is explicitly below the node == p1 logic above

700

# Copy trace disabling is explicitly below the node == p1 logic above

691

# because the logic above is required for a simple copy to be kept across a

701

# because the logic above is required for a simple copy to be kept across a

692

# rebase.

702

# rebase.

693

if copytracing == b'heuristics':

703

if copytracing == b'heuristics':

694

# Do full copytracing if only non-public revisions are involved as

704

# Do full copytracing if only non-public revisions are involved as

695

# that will be fast enough and will also cover the copies which could

705

# that will be fast enough and will also cover the copies which could

696

# be missed by heuristics

706

# be missed by heuristics

697

if _isfullcopytraceable(repo, c1, base):

707

if _isfullcopytraceable(repo, c1, base):

698

return _fullcopytracing(repo, c1, c2, base)

708

return _fullcopytracing(repo, c1, c2, base)

699

return _heuristicscopytracing(repo, c1, c2, base)

709

return _heuristicscopytracing(repo, c1, c2, base)

700

else:

710

else:

701

return _fullcopytracing(repo, c1, c2, base)

711

return _fullcopytracing(repo, c1, c2, base)

702

712

703

713

704

def _isfullcopytraceable(repo, c1, base):

714

def _isfullcopytraceable(repo, c1, base):

705

"""Checks that if base, source and destination are all no-public branches,

715

"""Checks that if base, source and destination are all no-public branches,

706

if yes let's use the full copytrace algorithm for increased capabilities

716

if yes let's use the full copytrace algorithm for increased capabilities

707

since it will be fast enough.

717

since it will be fast enough.

708

718

709

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

719

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

710

number of changesets from c1 to base such that if number of changesets are

720

number of changesets from c1 to base such that if number of changesets are

711

more than the limit, full copytracing algorithm won't be used.

721

more than the limit, full copytracing algorithm won't be used.

712

"""

722

"""

713

if c1.rev() is None:

723

if c1.rev() is None:

714

c1 = c1.p1()

724

c1 = c1.p1()

715

if c1.mutable() and base.mutable():

725

if c1.mutable() and base.mutable():

716

sourcecommitlimit = repo.ui.configint(

726

sourcecommitlimit = repo.ui.configint(

717

b'experimental', b'copytrace.sourcecommitlimit'

727

b'experimental', b'copytrace.sourcecommitlimit'

718

)

728

)

719

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

729

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

720

return commits < sourcecommitlimit

730

return commits < sourcecommitlimit

721

return False

731

return False

722

732

723

733

724

def _checksinglesidecopies(

734

def _checksinglesidecopies(

725

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

735

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

726

):

736

):

727

if src not in m2:

737

if src not in m2:

728

# deleted on side 2

738

# deleted on side 2

729

if src not in m1:

739

if src not in m1:

730

# renamed on side 1, deleted on side 2

740

# renamed on side 1, deleted on side 2

731

renamedelete[src] = dsts1

741

renamedelete[src] = dsts1

732

elif src not in mb:

742

elif src not in mb:

733

# Work around the "short-circuit to avoid issues with merge states"

743

# Work around the "short-circuit to avoid issues with merge states"

734

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

744

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

735

# destination doesn't exist in y.

745

# destination doesn't exist in y.

736

pass

746

pass

737

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

747

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

738

return

748

return

739

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

749

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

740

# modified on side 2

750

# modified on side 2

741

for dst in dsts1:

751

for dst in dsts1:

742

copy[dst] = src

752

copy[dst] = src

743

753

744

754

745

class branch_copies(object):

755

class branch_copies(object):

746

"""Information about copies made on one side of a merge/graft.

756

"""Information about copies made on one side of a merge/graft.

747

757

748

"copy" is a mapping from destination name -> source name,

758

"copy" is a mapping from destination name -> source name,

749

where source is in c1 and destination is in c2 or vice-versa.

759

where source is in c1 and destination is in c2 or vice-versa.

750

760

751

"movewithdir" is a mapping from source name -> destination name,

761

"movewithdir" is a mapping from source name -> destination name,

752

where the file at source present in one context but not the other

762

where the file at source present in one context but not the other

753

needs to be moved to destination by the merge process, because the

763

needs to be moved to destination by the merge process, because the

754

other context moved the directory it is in.

764

other context moved the directory it is in.

755

765

756

"renamedelete" is a mapping of source name -> list of destination

766

"renamedelete" is a mapping of source name -> list of destination

757

names for files deleted in c1 that were renamed in c2 or vice-versa.

767

names for files deleted in c1 that were renamed in c2 or vice-versa.

758

768

759

"dirmove" is a mapping of detected source dir -> destination dir renames.

769

"dirmove" is a mapping of detected source dir -> destination dir renames.

760

This is needed for handling changes to new files previously grafted into

770

This is needed for handling changes to new files previously grafted into

761

renamed directories.

771

renamed directories.

762

"""

772

"""

763

773

764

def __init__(

774

def __init__(

765

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

775

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

766

):

776

):

767

self.copy = {} if copy is None else copy

777

self.copy = {} if copy is None else copy

768

self.renamedelete = {} if renamedelete is None else renamedelete

778

self.renamedelete = {} if renamedelete is None else renamedelete

769

self.dirmove = {} if dirmove is None else dirmove

779

self.dirmove = {} if dirmove is None else dirmove

770

self.movewithdir = {} if movewithdir is None else movewithdir

780

self.movewithdir = {} if movewithdir is None else movewithdir

771

781

772

def __repr__(self):

782

def __repr__(self):

773

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

783

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

774

self.copy,

784

self.copy,

775

self.renamedelete,

785

self.renamedelete,

776

self.dirmove,

786

self.dirmove,

777

self.movewithdir,

787

self.movewithdir,

778

)

788

)

779

789

780

790

781

def _fullcopytracing(repo, c1, c2, base):

791

def _fullcopytracing(repo, c1, c2, base):

782

"""The full copytracing algorithm which finds all the new files that were

792

"""The full copytracing algorithm which finds all the new files that were

783

added from merge base up to the top commit and for each file it checks if

793

added from merge base up to the top commit and for each file it checks if

784

this file was copied from another file.

794

this file was copied from another file.

785

795

786

This is pretty slow when a lot of changesets are involved but will track all

796

This is pretty slow when a lot of changesets are involved but will track all

787

the copies.

797

the copies.

788

"""

798

"""

789

m1 = c1.manifest()

799

m1 = c1.manifest()

790

m2 = c2.manifest()

800

m2 = c2.manifest()

791

mb = base.manifest()

801

mb = base.manifest()

792

802

793

copies1 = pathcopies(base, c1)

803

copies1 = pathcopies(base, c1)

794

copies2 = pathcopies(base, c2)

804

copies2 = pathcopies(base, c2)

795

805

796

if not (copies1 or copies2):

806

if not (copies1 or copies2):

797

return branch_copies(), branch_copies(), {}

807

return branch_copies(), branch_copies(), {}

798

808

799

inversecopies1 = {}

809

inversecopies1 = {}

800

inversecopies2 = {}

810

inversecopies2 = {}

801

for dst, src in copies1.items():

811

for dst, src in copies1.items():

802

inversecopies1.setdefault(src, []).append(dst)

812

inversecopies1.setdefault(src, []).append(dst)

803

for dst, src in copies2.items():

813

for dst, src in copies2.items():

804

inversecopies2.setdefault(src, []).append(dst)

814

inversecopies2.setdefault(src, []).append(dst)

805

815

806

copy1 = {}

816

copy1 = {}

807

copy2 = {}

817

copy2 = {}

808

diverge = {}

818

diverge = {}

809

renamedelete1 = {}

819

renamedelete1 = {}

810

renamedelete2 = {}

820

renamedelete2 = {}

811

allsources = set(inversecopies1) | set(inversecopies2)

821

allsources = set(inversecopies1) | set(inversecopies2)

812

for src in allsources:

822

for src in allsources:

813

dsts1 = inversecopies1.get(src)

823

dsts1 = inversecopies1.get(src)

814

dsts2 = inversecopies2.get(src)

824

dsts2 = inversecopies2.get(src)

815

if dsts1 and dsts2:

825

if dsts1 and dsts2:

816

# copied/renamed on both sides

826

# copied/renamed on both sides

817

if src not in m1 and src not in m2:

827

if src not in m1 and src not in m2:

818

# renamed on both sides

828

# renamed on both sides

819

dsts1 = set(dsts1)

829

dsts1 = set(dsts1)

820

dsts2 = set(dsts2)

830

dsts2 = set(dsts2)

821

# If there's some overlap in the rename destinations, we

831

# If there's some overlap in the rename destinations, we

822

# consider it not divergent. For example, if side 1 copies 'a'

832

# consider it not divergent. For example, if side 1 copies 'a'

823

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

833

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

824

# and 'd' and deletes 'a'.

834

# and 'd' and deletes 'a'.

825

if dsts1 & dsts2:

835

if dsts1 & dsts2:

826

for dst in dsts1 & dsts2:

836

for dst in dsts1 & dsts2:

827

copy1[dst] = src

837

copy1[dst] = src

828

copy2[dst] = src

838

copy2[dst] = src

829

else:

839

else:

830

diverge[src] = sorted(dsts1 | dsts2)

840

diverge[src] = sorted(dsts1 | dsts2)

831

elif src in m1 and src in m2:

841

elif src in m1 and src in m2:

832

# copied on both sides

842

# copied on both sides

833

dsts1 = set(dsts1)

843

dsts1 = set(dsts1)

834

dsts2 = set(dsts2)

844

dsts2 = set(dsts2)

835

for dst in dsts1 & dsts2:

845

for dst in dsts1 & dsts2:

836

copy1[dst] = src

846

copy1[dst] = src

837

copy2[dst] = src

847

copy2[dst] = src

838

# TODO: Handle cases where it was renamed on one side and copied

848

# TODO: Handle cases where it was renamed on one side and copied

839

# on the other side

849

# on the other side

840

elif dsts1:

850

elif dsts1:

841

# copied/renamed only on side 1

851

# copied/renamed only on side 1

842

_checksinglesidecopies(

852

_checksinglesidecopies(

843

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

853

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

844

)

854

)

845

elif dsts2:

855

elif dsts2:

846

# copied/renamed only on side 2

856

# copied/renamed only on side 2

847

_checksinglesidecopies(

857

_checksinglesidecopies(

848

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

858

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

849

)

859

)

850

860

851

# find interesting file sets from manifests

861

# find interesting file sets from manifests

852

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

862

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

853

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

863

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

854

u1 = sorted(addedinm1 - addedinm2)

864

u1 = sorted(addedinm1 - addedinm2)

855

u2 = sorted(addedinm2 - addedinm1)

865

u2 = sorted(addedinm2 - addedinm1)

856

866

857

header = b" unmatched files in %s"

867

header = b" unmatched files in %s"

858

if u1:

868

if u1:

859

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

869

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

860

if u2:

870

if u2:

861

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

871

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

862

872

863

if repo.ui.debugflag:

873

if repo.ui.debugflag:

864

renamedeleteset = set()

874

renamedeleteset = set()

865

divergeset = set()

875

divergeset = set()

866

for dsts in diverge.values():

876

for dsts in diverge.values():

867

divergeset.update(dsts)

877

divergeset.update(dsts)

868

for dsts in renamedelete1.values():

878

for dsts in renamedelete1.values():

869

renamedeleteset.update(dsts)

879

renamedeleteset.update(dsts)

870

for dsts in renamedelete2.values():

880

for dsts in renamedelete2.values():

871

renamedeleteset.update(dsts)

881

renamedeleteset.update(dsts)

872

882

873

repo.ui.debug(

883

repo.ui.debug(

874

b" all copies found (* = to merge, ! = divergent, "

884

b" all copies found (* = to merge, ! = divergent, "

875

b"% = renamed and deleted):\n"

885

b"% = renamed and deleted):\n"

876

)

886

)

877

for side, copies in ((b"local", copies1), (b"remote", copies2)):

887

for side, copies in ((b"local", copies1), (b"remote", copies2)):

878

if not copies:

888

if not copies:

879

continue

889

continue

880

repo.ui.debug(b" on %s side:\n" % side)

890

repo.ui.debug(b" on %s side:\n" % side)

881

for f in sorted(copies):

891

for f in sorted(copies):

882

note = b""

892

note = b""

883

if f in copy1 or f in copy2:

893

if f in copy1 or f in copy2:

884

note += b"*"

894

note += b"*"

885

if f in divergeset:

895

if f in divergeset:

886

note += b"!"

896

note += b"!"

887

if f in renamedeleteset:

897

if f in renamedeleteset:

888

note += b"%"

898

note += b"%"

889

repo.ui.debug(

899

repo.ui.debug(

890

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

900

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

891

)

901

)

892

del renamedeleteset

902

del renamedeleteset

893

del divergeset

903

del divergeset

894

904

895

repo.ui.debug(b" checking for directory renames\n")

905

repo.ui.debug(b" checking for directory renames\n")

896

906

897

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

907

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

898

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

908

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

899

909

900

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

910

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

901

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

911

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

902

912

903

return branch_copies1, branch_copies2, diverge

913

return branch_copies1, branch_copies2, diverge

904

914

905

915

906

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

916

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

907

"""Finds moved directories and files that should move with them.

917

"""Finds moved directories and files that should move with them.

908

918

909

ctx: the context for one of the sides

919

ctx: the context for one of the sides

910

copy: files copied on the same side (as ctx)

920

copy: files copied on the same side (as ctx)

911

fullcopy: files copied on the same side (as ctx), including those that

921

fullcopy: files copied on the same side (as ctx), including those that

912

merge.manifestmerge() won't care about

922

merge.manifestmerge() won't care about

913

addedfiles: added files on the other side (compared to ctx)

923

addedfiles: added files on the other side (compared to ctx)

914

"""

924

"""

915

# generate a directory move map

925

# generate a directory move map

916

d = ctx.dirs()

926

d = ctx.dirs()

917

invalid = set()

927

invalid = set()

918

dirmove = {}

928

dirmove = {}

919

929

920

# examine each file copy for a potential directory move, which is

930

# examine each file copy for a potential directory move, which is

921

# when all the files in a directory are moved to a new directory

931

# when all the files in a directory are moved to a new directory

922

for dst, src in pycompat.iteritems(fullcopy):

932

for dst, src in pycompat.iteritems(fullcopy):

923

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

933

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

924

if dsrc in invalid:

934

if dsrc in invalid:

925

# already seen to be uninteresting

935

# already seen to be uninteresting

926

continue

936

continue

927

elif dsrc in d and ddst in d:

937

elif dsrc in d and ddst in d:

928

# directory wasn't entirely moved locally

938

# directory wasn't entirely moved locally

929

invalid.add(dsrc)

939

invalid.add(dsrc)

930

elif dsrc in dirmove and dirmove[dsrc] != ddst:

940

elif dsrc in dirmove and dirmove[dsrc] != ddst:

931

# files from the same directory moved to two different places

941

# files from the same directory moved to two different places

932

invalid.add(dsrc)

942

invalid.add(dsrc)

933

else:

943

else:

934

# looks good so far

944

# looks good so far

935

dirmove[dsrc] = ddst

945

dirmove[dsrc] = ddst

936

946

937

for i in invalid:

947

for i in invalid:

938

if i in dirmove:

948

if i in dirmove:

939

del dirmove[i]

949

del dirmove[i]

940

del d, invalid

950

del d, invalid

941

951

942

if not dirmove:

952

if not dirmove:

943

return {}, {}

953

return {}, {}

944

954

945

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

955

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

946

956

947

for d in dirmove:

957

for d in dirmove:

948

repo.ui.debug(

958

repo.ui.debug(

949

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

959

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

950

)

960

)

951

961

952

movewithdir = {}

962

movewithdir = {}

953

# check unaccounted nonoverlapping files against directory moves

963

# check unaccounted nonoverlapping files against directory moves

954

for f in addedfiles:

964

for f in addedfiles:

955

if f not in fullcopy:

965

if f not in fullcopy:

956

for d in dirmove:

966

for d in dirmove:

957

if f.startswith(d):

967

if f.startswith(d):

958

# new file added in a directory that was moved, move it

968

# new file added in a directory that was moved, move it

959

df = dirmove[d] + f[len(d) :]

969

df = dirmove[d] + f[len(d) :]

960

if df not in copy:

970

if df not in copy:

961

movewithdir[f] = df

971

movewithdir[f] = df

962

repo.ui.debug(

972

repo.ui.debug(

963

b" pending file src: '%s' -> dst: '%s'\n"

973

b" pending file src: '%s' -> dst: '%s'\n"

964

% (f, df)

974

% (f, df)

965

)

975

)

966

break

976

break

967

977

968

return dirmove, movewithdir

978

return dirmove, movewithdir

969

979

970

980

971

def _heuristicscopytracing(repo, c1, c2, base):

981

def _heuristicscopytracing(repo, c1, c2, base):

972

"""Fast copytracing using filename heuristics

982

"""Fast copytracing using filename heuristics

973

983

974

Assumes that moves or renames are of following two types:

984

Assumes that moves or renames are of following two types:

975

985

976

1) Inside a directory only (same directory name but different filenames)

986

1) Inside a directory only (same directory name but different filenames)

977

2) Move from one directory to another

987

2) Move from one directory to another

978

(same filenames but different directory names)

988

(same filenames but different directory names)

979

989

980

Works only when there are no merge commits in the "source branch".

990

Works only when there are no merge commits in the "source branch".

981

Source branch is commits from base up to c2 not including base.

991

Source branch is commits from base up to c2 not including base.

982

992

983

If merge is involved it fallbacks to _fullcopytracing().

993

If merge is involved it fallbacks to _fullcopytracing().

984

994

985

Can be used by setting the following config:

995

Can be used by setting the following config:

986

996

987

[experimental]

997

[experimental]

988

copytrace = heuristics

998

copytrace = heuristics

989

999

990

In some cases the copy/move candidates found by heuristics can be very large

1000

In some cases the copy/move candidates found by heuristics can be very large

991

in number and that will make the algorithm slow. The number of possible

1001

in number and that will make the algorithm slow. The number of possible

992

candidates to check can be limited by using the config

1002

candidates to check can be limited by using the config

993

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1003

`experimental.copytrace.movecandidateslimit` which defaults to 100.

994

"""

1004

"""

995

1005

996

if c1.rev() is None:

1006

if c1.rev() is None:

997

c1 = c1.p1()

1007

c1 = c1.p1()

998

if c2.rev() is None:

1008

if c2.rev() is None:

999

c2 = c2.p1()

1009

c2 = c2.p1()

1000

1010

1001

changedfiles = set()

1011

changedfiles = set()

1002

m1 = c1.manifest()

1012

m1 = c1.manifest()

1003

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1013

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1004

# If base is not in c2 branch, we switch to fullcopytracing

1014

# If base is not in c2 branch, we switch to fullcopytracing

1005

repo.ui.debug(

1015

repo.ui.debug(

1006

b"switching to full copytracing as base is not "

1016

b"switching to full copytracing as base is not "

1007

b"an ancestor of c2\n"

1017

b"an ancestor of c2\n"

1008

)

1018

)

1009

return _fullcopytracing(repo, c1, c2, base)

1019

return _fullcopytracing(repo, c1, c2, base)

1010

1020

1011

ctx = c2

1021

ctx = c2

1012

while ctx != base:

1022

while ctx != base:

1013

if len(ctx.parents()) == 2:

1023

if len(ctx.parents()) == 2:

1014

# To keep things simple let's not handle merges

1024

# To keep things simple let's not handle merges

1015

repo.ui.debug(b"switching to full copytracing because of merges\n")

1025

repo.ui.debug(b"switching to full copytracing because of merges\n")

1016

return _fullcopytracing(repo, c1, c2, base)

1026

return _fullcopytracing(repo, c1, c2, base)

1017

changedfiles.update(ctx.files())

1027

changedfiles.update(ctx.files())

1018

ctx = ctx.p1()

1028

ctx = ctx.p1()

1019

1029

1020

copies2 = {}

1030

copies2 = {}

1021

cp = _forwardcopies(base, c2)

1031

cp = _forwardcopies(base, c2)

1022

for dst, src in pycompat.iteritems(cp):

1032

for dst, src in pycompat.iteritems(cp):

1023

if src in m1:

1033

if src in m1:

1024

copies2[dst] = src

1034

copies2[dst] = src

1025

1035

1026

# file is missing if it isn't present in the destination, but is present in

1036

# file is missing if it isn't present in the destination, but is present in

1027

# the base and present in the source.

1037

# the base and present in the source.

1028

# Presence in the base is important to exclude added files, presence in the

1038

# Presence in the base is important to exclude added files, presence in the

1029

# source is important to exclude removed files.

1039

# source is important to exclude removed files.

1030

filt = lambda f: f not in m1 and f in base and f in c2

1040

filt = lambda f: f not in m1 and f in base and f in c2

1031

missingfiles = [f for f in changedfiles if filt(f)]

1041

missingfiles = [f for f in changedfiles if filt(f)]

1032

1042

1033

copies1 = {}

1043

copies1 = {}

1034

if missingfiles:

1044

if missingfiles:

1035

basenametofilename = collections.defaultdict(list)

1045

basenametofilename = collections.defaultdict(list)

1036

dirnametofilename = collections.defaultdict(list)

1046

dirnametofilename = collections.defaultdict(list)

1037

1047

1038

for f in m1.filesnotin(base.manifest()):

1048

for f in m1.filesnotin(base.manifest()):

1039

basename = os.path.basename(f)

1049

basename = os.path.basename(f)

1040

dirname = os.path.dirname(f)

1050

dirname = os.path.dirname(f)

1041

basenametofilename[basename].append(f)

1051

basenametofilename[basename].append(f)

1042

dirnametofilename[dirname].append(f)

1052

dirnametofilename[dirname].append(f)

1043

1053

1044

for f in missingfiles:

1054

for f in missingfiles:

1045

basename = os.path.basename(f)

1055

basename = os.path.basename(f)

1046

dirname = os.path.dirname(f)

1056

dirname = os.path.dirname(f)

1047

samebasename = basenametofilename[basename]

1057

samebasename = basenametofilename[basename]

1048

samedirname = dirnametofilename[dirname]

1058

samedirname = dirnametofilename[dirname]

1049

movecandidates = samebasename + samedirname

1059

movecandidates = samebasename + samedirname

1050

# f is guaranteed to be present in c2, that's why

1060

# f is guaranteed to be present in c2, that's why

1051

# c2.filectx(f) won't fail

1061

# c2.filectx(f) won't fail

1052

f2 = c2.filectx(f)

1062

f2 = c2.filectx(f)

1053

# we can have a lot of candidates which can slow down the heuristics

1063

# we can have a lot of candidates which can slow down the heuristics

1054

# config value to limit the number of candidates moves to check

1064

# config value to limit the number of candidates moves to check

1055

maxcandidates = repo.ui.configint(

1065

maxcandidates = repo.ui.configint(

1056

b'experimental', b'copytrace.movecandidateslimit'

1066

b'experimental', b'copytrace.movecandidateslimit'

1057

)

1067

)

1058

1068

1059

if len(movecandidates) > maxcandidates:

1069

if len(movecandidates) > maxcandidates:

1060

repo.ui.status(

1070

repo.ui.status(

1061

_(

1071

_(

1062

b"skipping copytracing for '%s', more "

1072

b"skipping copytracing for '%s', more "

1063

b"candidates than the limit: %d\n"

1073

b"candidates than the limit: %d\n"

1064

)

1074

)

1065

% (f, len(movecandidates))

1075

% (f, len(movecandidates))

1066

)

1076

)

1067

continue

1077

continue

1068

1078

1069

for candidate in movecandidates:

1079

for candidate in movecandidates:

1070

f1 = c1.filectx(candidate)

1080

f1 = c1.filectx(candidate)

1071

if _related(f1, f2):

1081

if _related(f1, f2):

1072

# if there are a few related copies then we'll merge

1082

# if there are a few related copies then we'll merge

1073

# changes into all of them. This matches the behaviour

1083

# changes into all of them. This matches the behaviour

1074

# of upstream copytracing

1084

# of upstream copytracing

1075

copies1[candidate] = f

1085

copies1[candidate] = f

1076

1086

1077

return branch_copies(copies1), branch_copies(copies2), {}

1087

return branch_copies(copies1), branch_copies(copies2), {}

1078

1088

1079

1089

1080

def _related(f1, f2):

1090

def _related(f1, f2):

1081

"""return True if f1 and f2 filectx have a common ancestor

1091

"""return True if f1 and f2 filectx have a common ancestor

1082

1092

1083

Walk back to common ancestor to see if the two files originate

1093

Walk back to common ancestor to see if the two files originate

1084

from the same file. Since workingfilectx's rev() is None it messes

1094

from the same file. Since workingfilectx's rev() is None it messes

1085

up the integer comparison logic, hence the pre-step check for

1095

up the integer comparison logic, hence the pre-step check for

1086

None (f1 and f2 can only be workingfilectx's initially).

1096

None (f1 and f2 can only be workingfilectx's initially).

1087

"""

1097

"""

1088

1098

1089

if f1 == f2:

1099

if f1 == f2:

1090

return True # a match

1100

return True # a match

1091

1101

1092

g1, g2 = f1.ancestors(), f2.ancestors()

1102

g1, g2 = f1.ancestors(), f2.ancestors()

1093

try:

1103

try:

1094

f1r, f2r = f1.linkrev(), f2.linkrev()

1104

f1r, f2r = f1.linkrev(), f2.linkrev()

1095

1105

1096

if f1r is None:

1106

if f1r is None:

1097

f1 = next(g1)

1107

f1 = next(g1)

1098

if f2r is None:

1108

if f2r is None:

1099

f2 = next(g2)

1109

f2 = next(g2)

1100

1110

1101

while True:

1111

while True:

1102

f1r, f2r = f1.linkrev(), f2.linkrev()

1112

f1r, f2r = f1.linkrev(), f2.linkrev()

1103

if f1r > f2r:

1113

if f1r > f2r:

1104

f1 = next(g1)

1114

f1 = next(g1)

1105

elif f2r > f1r:

1115

elif f2r > f1r:

1106

f2 = next(g2)

1116

f2 = next(g2)

1107

else: # f1 and f2 point to files in the same linkrev

1117

else: # f1 and f2 point to files in the same linkrev

1108

return f1 == f2 # true if they point to the same file

1118

return f1 == f2 # true if they point to the same file

1109

except StopIteration:

1119

except StopIteration:

1110

return False

1120

return False

1111

1121

1112

1122

1113

def graftcopies(wctx, ctx, base):

1123

def graftcopies(wctx, ctx, base):

1114

"""reproduce copies between base and ctx in the wctx

1124

"""reproduce copies between base and ctx in the wctx

1115

1125

1116

Unlike mergecopies(), this function will only consider copies between base

1126

Unlike mergecopies(), this function will only consider copies between base

1117

and ctx; it will ignore copies between base and wctx. Also unlike

1127

and ctx; it will ignore copies between base and wctx. Also unlike

1118

mergecopies(), this function will apply copies to the working copy (instead

1128

mergecopies(), this function will apply copies to the working copy (instead

1119

of just returning information about the copies). That makes it cheaper

1129

of just returning information about the copies). That makes it cheaper

1120

(especially in the common case of base==ctx.p1()) and useful also when

1130

(especially in the common case of base==ctx.p1()) and useful also when

1121

experimental.copytrace=off.

1131

experimental.copytrace=off.

1122

1132

1123

merge.update() will have already marked most copies, but it will only

1133

merge.update() will have already marked most copies, but it will only

1124

mark copies if it thinks the source files are related (see

1134

mark copies if it thinks the source files are related (see

1125

merge._related()). It will also not mark copies if the file wasn't modified

1135

merge._related()). It will also not mark copies if the file wasn't modified

1126

on the local side. This function adds the copies that were "missed"

1136

on the local side. This function adds the copies that were "missed"

1127

by merge.update().

1137

by merge.update().

1128

"""

1138

"""

1129

new_copies = pathcopies(base, ctx)

1139

new_copies = pathcopies(base, ctx)

1130

_filter(wctx.p1(), wctx, new_copies)

1140

_filter(wctx.p1(), wctx, new_copies)

1131

for dst, src in pycompat.iteritems(new_copies):

1141

for dst, src in pycompat.iteritems(new_copies):

1132

wctx[dst].markcopied(src)

1142

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
+                policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import flagutil
+            rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 # A small cache to avoid doing the work twice for merges
                 #
                 # In the vast majority of cases, if we ask information for a revision
                 # about 1 parent, we'll later ask it for the other. So it make sense to
                 # keep the information around when reaching the first parent of a merge
                 # and dropping it after it was provided for the second parents.
                 #
                 # It exists cases were only one parent of the merge will be walked. It
                 # happens when the "destination" the copy tracing is descendant from a
                 # new root, not common with the "source". In that case, we will only walk
                 # through merge parents that are descendant of changesets common
                 # between "source" and "destination".
                 #
                 # With the current case implementation if such changesets have a copy
                 # information, we'll keep them in memory until the end of
                 # _changesetforwardcopies. We don't expect the case to be frequent
                 # enough to matters.
                 #
                 # In addition, it would be possible to reach pathological case, were
                 # many first parent are met before any second parent is reached. In
                 # that case the cache could grow. If this even become an issue one can
                 # safely introduce a maximum cache size. This would trade extra CPU/IO
                 # time to save memory.
                 merge_caches = {}
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     value = None
                     e = merge_caches.pop(rev, None)
                     if e is not None:
                         return e
                     changes = None
                     if flags(rev) & HASCOPIESINFO:
                         changes = changelogrevision(rev).changes
                     value = (p1, p2, changes)
                     if p1 != node.nullrev and p2 != node.nullrev:
                         # XXX some case we over cache, IGNORE
                         merge_caches[rev] = value
                     return value
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cached_is_ancestor(cl.isancestorrev)
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 if repo.filecopiesmode == b'changeset-sidedata':
                     revinfo = _revinfo_getter(repo)
                     return _combine_changeset_copies(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
+                alwaysmatch = match.always()
+                if rustmod is not None and alwaysmatch:
+                    return rustmod.combine_changeset_copies(
+                        list(revs), children, targetrev, revinfo, isancestor
+                    )
                 all_copies = {}
-                alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, changes = revinfo(c)
                         childcopies = {}
                         if r == p1:
                             parent = 1
                             if changes is not None:
                                 childcopies = changes.copied_from_p1
                         else:
                             assert r == p2
                             parent = 2
                             if changes is not None:
                                 childcopies = changes.copied_from_p2
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         if changes is not None:
                             for f in changes.removed:
                                 if f in newcopies:
                                     if newcopies is copies:
                                         # copy on write to avoid affecting potential other
                                         # branches.  when there are no other branches, this
                                         # could be avoided.
                                         newcopies = copies.copy()
                                     newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict(
                                     othercopies, newcopies, isancestor, changes
                                 )
                             else:
                                 _merge_copies_dict(
                                     newcopies, othercopies, isancestor, changes
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if new_tt == other_tt:
                             minor[dest] = value
                         elif (
                             changes is not None
                             and value[1] is None
                             and dest in changes.salvaged
                         ):
                             pass
                         elif (
                             changes is not None
                             and other[1] is None
                             and dest in changes.salvaged
                         ):
                             minor[dest] = value
                         elif changes is not None and dest in changes.merged:
                             minor[dest] = value
                         elif not isancestor(new_tt, other_tt):
                             if value[1] is not None:
                                 minor[dest] = value
                             elif isancestor(other_tt, new_tt):
                                 minor[dest] = value
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)