upstream/mercurial-mirror Commit - r44225:421ea577

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

18

from . import (

18

from . import (

19

error,

19

error,

20

match as matchmod,

20

match as matchmod,

21

node,

21

node,

22

pathutil,

22

pathutil,

23

pycompat,

23

pycompat,

24

util,

24

util,

25

)

25

)

26

27

from .revlogutils import sidedata as sidedatamod

27

from .revlogutils import sidedata as sidedatamod

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

32

def _filter(src, dst, t):

32

def _filter(src, dst, t):

33

"""filters out invalid copies after chaining"""

33

"""filters out invalid copies after chaining"""

34

35

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

35

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

36

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# in the following table (not including trivial cases). For example, case 2

37

# in the following table (not including trivial cases). For example, case 2

38

# is where a file existed in 'src' and remained under that name in 'mid' and

38

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# then was renamed between 'mid' and 'dst'.

39

# then was renamed between 'mid' and 'dst'.

40

#

40

#

41

# case src mid dst result

41

# case src mid dst result

42

# 1 x y - -

42

# 1 x y - -

43

# 2 x y y x->y

43

# 2 x y y x->y

44

# 3 x y x -

44

# 3 x y x -

45

# 4 x y z x->z

45

# 4 x y z x->z

46

# 5 - x y -

46

# 5 - x y -

47

# 6 x x y x->y

47

# 6 x x y x->y

48

#

48

#

49

# _chain() takes care of chaining the copies in 'a' and 'b', but it

49

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

50

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# between 5 and 6, so it includes all cases in its result.

51

# between 5 and 6, so it includes all cases in its result.

52

# Cases 1, 3, and 5 are then removed by _filter().

52

# Cases 1, 3, and 5 are then removed by _filter().

53

54

for k, v in list(t.items()):

54

for k, v in list(t.items()):

55

# remove copies from files that didn't exist

55

# remove copies from files that didn't exist

56

if v not in src:

56

if v not in src:

57

del t[k]

57

del t[k]

58

# remove criss-crossed copies

58

# remove criss-crossed copies

59

elif k in src and v in dst:

59

elif k in src and v in dst:

60

del t[k]

60

del t[k]

61

# remove copies to files that were then removed

61

# remove copies to files that were then removed

62

elif k not in dst:

62

elif k not in dst:

63

del t[k]

63

del t[k]

64

65

66

def _chain(prefix, suffix):

66

def _chain(prefix, suffix):

67

"""chain two sets of copies 'prefix' and 'suffix'"""

67

"""chain two sets of copies 'prefix' and 'suffix'"""

68

result = prefix.copy()

68

result = prefix.copy()

69

for key, value in pycompat.iteritems(suffix):

69

for key, value in pycompat.iteritems(suffix):

70

result[key] = prefix.get(value, value)

70

result[key] = prefix.get(value, value)

71

return result

71

return result

72

73

74

def _tracefile(fctx, am, basemf):

74

def _tracefile(fctx, am, basemf):

75

"""return file context that is the ancestor of fctx present in ancestor

75

"""return file context that is the ancestor of fctx present in ancestor

76

manifest am

76

manifest am

77

78

Note: we used to try and stop after a given limit, however checking if that

78

Note: we used to try and stop after a given limit, however checking if that

79

limit is reached turned out to be very expensive. we are better off

79

limit is reached turned out to be very expensive. we are better off

80

disabling that feature."""

80

disabling that feature."""

81

82

for f in fctx.ancestors():

82

for f in fctx.ancestors():

83

path = f.path()

83

path = f.path()

84

if am.get(path, None) == f.filenode():

84

if am.get(path, None) == f.filenode():

85

return path

85

return path

86

if basemf and basemf.get(path, None) == f.filenode():

86

if basemf and basemf.get(path, None) == f.filenode():

87

return path

87

return path

88

89

90

def _dirstatecopies(repo, match=None):

90

def _dirstatecopies(repo, match=None):

91

ds = repo.dirstate

91

ds = repo.dirstate

92

c = ds.copies().copy()

92

c = ds.copies().copy()

93

for k in list(c):

93

for k in list(c):

94

if ds[k] not in b'anm' or (match and not match(k)):

94

if ds[k] not in b'anm' or (match and not match(k)):

95

del c[k]

95

del c[k]

96

return c

96

return c

97

98

99

def _computeforwardmissing(a, b, match=None):

99

def _computeforwardmissing(a, b, match=None):

100

"""Computes which files are in b but not a.

100

"""Computes which files are in b but not a.

101

This is its own function so extensions can easily wrap this call to see what

101

This is its own function so extensions can easily wrap this call to see what

102

files _forwardcopies is about to process.

102

files _forwardcopies is about to process.

103

"""

103

"""

104

ma = a.manifest()

104

ma = a.manifest()

105

mb = b.manifest()

105

mb = b.manifest()

106

return mb.filesnotin(ma, match=match)

106

return mb.filesnotin(ma, match=match)

107

108

109

def usechangesetcentricalgo(repo):

109

def usechangesetcentricalgo(repo):

110

"""Checks if we should use changeset-centric copy algorithms"""

110

"""Checks if we should use changeset-centric copy algorithms"""

111

if repo.filecopiesmode == b'changeset-sidedata':

111

if repo.filecopiesmode == b'changeset-sidedata':

112

return True

112

return True

113

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

113

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

changesetsource = (b'changeset-only', b'compatibility')

114

changesetsource = (b'changeset-only', b'compatibility')

115

return readfrom in changesetsource

115

return readfrom in changesetsource

116

117

118

def _committedforwardcopies(a, b, base, match):

118

def _committedforwardcopies(a, b, base, match):

119

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

119

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

# files might have to be traced back to the fctx parent of the last

120

# files might have to be traced back to the fctx parent of the last

121

# one-side-only changeset, but not further back than that

121

# one-side-only changeset, but not further back than that

122

repo = a._repo

122

repo = a._repo

123

124

if usechangesetcentricalgo(repo):

124

if usechangesetcentricalgo(repo):

125

return _changesetforwardcopies(a, b, match)

125

return _changesetforwardcopies(a, b, match)

126

127

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

127

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

dbg = repo.ui.debug

128

dbg = repo.ui.debug

129

if debug:

129

if debug:

130

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

130

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

am = a.manifest()

131

am = a.manifest()

132

basemf = None if base is None else base.manifest()

132

basemf = None if base is None else base.manifest()

133

134

# find where new files came from

134

# find where new files came from

135

# we currently don't try to find where old files went, too expensive

135

# we currently don't try to find where old files went, too expensive

136

# this means we can miss a case like 'hg rm b; hg cp a b'

136

# this means we can miss a case like 'hg rm b; hg cp a b'

137

cm = {}

137

cm = {}

138

139

# Computing the forward missing is quite expensive on large manifests, since

139

# Computing the forward missing is quite expensive on large manifests, since

140

# it compares the entire manifests. We can optimize it in the common use

140

# it compares the entire manifests. We can optimize it in the common use

141

# case of computing what copies are in a commit versus its parent (like

141

# case of computing what copies are in a commit versus its parent (like

142

# during a rebase or histedit). Note, we exclude merge commits from this

142

# during a rebase or histedit). Note, we exclude merge commits from this

143

# optimization, since the ctx.files() for a merge commit is not correct for

143

# optimization, since the ctx.files() for a merge commit is not correct for

144

# this comparison.

144

# this comparison.

145

forwardmissingmatch = match

145

forwardmissingmatch = match

146

if b.p1() == a and b.p2().node() == node.nullid:

146

if b.p1() == a and b.p2().node() == node.nullid:

147

filesmatcher = matchmod.exact(b.files())

147

filesmatcher = matchmod.exact(b.files())

148

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

148

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

149

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

151

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

151

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

153

if debug:

153

if debug:

154

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

154

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

156

for f in sorted(missing):

156

for f in sorted(missing):

157

if debug:

157

if debug:

158

dbg(b'debug.copies: tracing file: %s\n' % f)

158

dbg(b'debug.copies: tracing file: %s\n' % f)

159

fctx = b[f]

159

fctx = b[f]

160

fctx._ancestrycontext = ancestrycontext

160

fctx._ancestrycontext = ancestrycontext

161

162

if debug:

162

if debug:

163

start = util.timer()

163

start = util.timer()

164

opath = _tracefile(fctx, am, basemf)

164

opath = _tracefile(fctx, am, basemf)

165

if opath:

165

if opath:

166

if debug:

166

if debug:

167

dbg(b'debug.copies: rename of: %s\n' % opath)

167

dbg(b'debug.copies: rename of: %s\n' % opath)

168

cm[f] = opath

168

cm[f] = opath

169

if debug:

169

if debug:

170

dbg(

170

dbg(

171

b'debug.copies: time: %f seconds\n'

171

b'debug.copies: time: %f seconds\n'

172

% (util.timer() - start)

172

% (util.timer() - start)

173

)

173

)

174

return cm

174

return cm

175

176

177

def _revinfogetter(repo):

177

def _revinfogetter(repo):

178

"""return a function that return multiple data given a <rev>"i

178

"""return a function that return multiple data given a <rev>"i

179

180

* p1: revision number of first parent

180

* p1: revision number of first parent

181

* p2: revision number of first parent

181

* p2: revision number of first parent

182

* p1copies: mapping of copies from p1

182

* p1copies: mapping of copies from p1

183

* p2copies: mapping of copies from p2

183

* p2copies: mapping of copies from p2

184

* removed: a list of removed files

184

* removed: a list of removed files

185

"""

185

"""

186

cl = repo.changelog

186

cl = repo.changelog

187

parents = cl.parentrevs

187

parents = cl.parentrevs

188

189

if repo.filecopiesmode == b'changeset-sidedata':

189

if repo.filecopiesmode == b'changeset-sidedata':

190

changelogrevision = cl.changelogrevision

190

changelogrevision = cl.changelogrevision

191

flags = cl.flags

191

flags = cl.flags

192

193

# A small cache to avoid doing the work twice for merges

193

# A small cache to avoid doing the work twice for merges

194

#

194

#

195

# In the vast majority of cases, if we ask information for a revision

195

# In the vast majority of cases, if we ask information for a revision

196

# about 1 parent, we'll later ask it for the other. So it make sense to

196

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# keep the information around when reaching the first parent of a merge

197

# keep the information around when reaching the first parent of a merge

198

# and dropping it after it was provided for the second parents.

198

# and dropping it after it was provided for the second parents.

199

#

199

#

200

# It exists cases were only one parent of the merge will be walked. It

200

# It exists cases were only one parent of the merge will be walked. It

201

# happens when the "destination" the copy tracing is descendant from a

201

# happens when the "destination" the copy tracing is descendant from a

202

# new root, not common with the "source". In that case, we will only walk

202

# new root, not common with the "source". In that case, we will only walk

203

# through merge parents that are descendant of changesets common

203

# through merge parents that are descendant of changesets common

204

# between "source" and "destination".

204

# between "source" and "destination".

205

#

205

#

206

# With the current case implementation if such changesets have a copy

206

# With the current case implementation if such changesets have a copy

207

# information, we'll keep them in memory until the end of

207

# information, we'll keep them in memory until the end of

208

# _changesetforwardcopies. We don't expect the case to be frequent

208

# _changesetforwardcopies. We don't expect the case to be frequent

209

# enough to matters.

209

# enough to matters.

210

#

210

#

211

# In addition, it would be possible to reach pathological case, were

211

# In addition, it would be possible to reach pathological case, were

212

# many first parent are met before any second parent is reached. In

212

# many first parent are met before any second parent is reached. In

213

# that case the cache could grow. If this even become an issue one can

213

# that case the cache could grow. If this even become an issue one can

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# time to save memory.

215

# time to save memory.

216

merge_caches = {}

216

merge_caches = {}

217

218

def revinfo(rev):

218

def revinfo(rev):

219

p1, p2 = parents(rev)

219

p1, p2 = parents(rev)

220

if flags(rev) & REVIDX_SIDEDATA:

220

if flags(rev) & REVIDX_SIDEDATA:

221

e = merge_caches.pop(rev, None)

221

e = merge_caches.pop(rev, None)

222

if e is not None:

222

if e is not None:

223

return e

223

return e

224

c = changelogrevision(rev)

224

c = changelogrevision(rev)

225

p1copies = c.p1copies

225

p1copies = c.p1copies

226

p2copies = c.p2copies

226

p2copies = c.p2copies

227

removed = c.filesremoved

227

removed = c.filesremoved

228

if p1 != node.nullrev and p2 != node.nullrev:

228

if p1 != node.nullrev and p2 != node.nullrev:

229

# XXX some case we over cache, IGNORE

229

# XXX some case we over cache, IGNORE

230

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

230

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

231

else:

231

else:

232

p1copies = {}

232

p1copies = {}

233

p2copies = {}

233

p2copies = {}

234

removed = []

234

removed = []

235

return p1, p2, p1copies, p2copies, removed

235

return p1, p2, p1copies, p2copies, removed

236

237

else:

237

else:

238

239

def revinfo(rev):

239

def revinfo(rev):

240

p1, p2 = parents(rev)

240

p1, p2 = parents(rev)

241

ctx = repo[rev]

241

ctx = repo[rev]

242

p1copies, p2copies = ctx._copies

242

p1copies, p2copies = ctx._copies

243

removed = ctx.filesremoved()

243

removed = ctx.filesremoved()

244

return p1, p2, p1copies, p2copies, removed

244

return p1, p2, p1copies, p2copies, removed

245

246

return revinfo

246

return revinfo

247

248

249

def _changesetforwardcopies(a, b, match):

249

def _changesetforwardcopies(a, b, match):

250

if a.rev() in (node.nullrev, b.rev()):

250

if a.rev() in (node.nullrev, b.rev()):

251

return {}

251

return {}

252

253

repo = a.repo().unfiltered()

253

repo = a.repo().unfiltered()

254

children = {}

254

children = {}

255

revinfo = _revinfogetter(repo)

255

revinfo = _revinfogetter(repo)

256

257

cl = repo.changelog

257

cl = repo.changelog

258

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

258

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

259

mrset = set(missingrevs)

259

mrset = set(missingrevs)

260

roots = set()

260

roots = set()

261

for r in missingrevs:

261

for r in missingrevs:

262

for p in cl.parentrevs(r):

262

for p in cl.parentrevs(r):

263

if p == node.nullrev:

263

if p == node.nullrev:

264

continue

264

continue

265

if p not in children:

265

if p not in children:

266

children[p] = [r]

266

children[p] = [r]

267

else:

267

else:

268

children[p].append(r)

268

children[p].append(r)

269

if p not in mrset:

269

if p not in mrset:

270

roots.add(p)

270

roots.add(p)

271

if not roots:

271

if not roots:

272

# no common revision to track copies from

272

# no common revision to track copies from

273

return {}

273

return {}

274

min_root = min(roots)

274

min_root = min(roots)

275

276

from_head = set(

276

from_head = set(

277

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

277

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

278

)

278

)

279

280

iterrevs = set(from_head)

280

iterrevs = set(from_head)

281

iterrevs &= mrset

281

iterrevs &= mrset

282

iterrevs.update(roots)

282

iterrevs.update(roots)

283

iterrevs.remove(b.rev())

283

iterrevs.remove(b.rev())

284

revs = sorted(iterrevs)

285

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

286

287

288

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

289

"""combine the copies information for each item of iterrevs

290

291

revs: sorted iterable of revision to visit

292

children: a {parent: [children]} mapping.

293

targetrev: the final copies destination revision (not in iterrevs)

294

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

295

match: a matcher

296

297

It returns the aggregated copies information for `targetrev`.

298

"""

284

all_copies = {}

299

all_copies = {}

285

alwaysmatch = match.always()

300

alwaysmatch = match.always()

286

for r in ~~sorted~~(~~iter~~revs):

301

for r in revs:

287

copies = all_copies.pop(r, None)

302

copies = all_copies.pop(r, None)

288

if copies is None:

303

if copies is None:

289

# this is a root

304

# this is a root

290

copies = {}

305

copies = {}

291

for i, c in enumerate(children[r]):

306

for i, c in enumerate(children[r]):

292

p1, p2, p1copies, p2copies, removed = revinfo(c)

307

p1, p2, p1copies, p2copies, removed = revinfo(c)

293

if r == p1:

308

if r == p1:

294

parent = 1

309

parent = 1

295

childcopies = p1copies

310

childcopies = p1copies

296

else:

311

else:

297

assert r == p2

312

assert r == p2

298

parent = 2

313

parent = 2

299

childcopies = p2copies

314

childcopies = p2copies

300

if not alwaysmatch:

315

if not alwaysmatch:

301

childcopies = {

316

childcopies = {

302

dst: src for dst, src in childcopies.items() if match(dst)

317

dst: src for dst, src in childcopies.items() if match(dst)

303

}

318

}

304

newcopies = copies

319

newcopies = copies

305

if childcopies:

320

if childcopies:

306

newcopies = _chain(newcopies, childcopies)

321

newcopies = _chain(newcopies, childcopies)

307

# _chain makes a copies, we can avoid doing so in some

322

# _chain makes a copies, we can avoid doing so in some

308

# simple/linear cases.

323

# simple/linear cases.

309

assert newcopies is not copies

324

assert newcopies is not copies

310

for f in removed:

325

for f in removed:

311

if f in newcopies:

326

if f in newcopies:

312

if newcopies is copies:

327

if newcopies is copies:

313

# copy on write to avoid affecting potential other

328

# copy on write to avoid affecting potential other

314

# branches. when there are no other branches, this

329

# branches. when there are no other branches, this

315

# could be avoided.

330

# could be avoided.

316

newcopies = copies.copy()

331

newcopies = copies.copy()

317

del newcopies[f]

332

del newcopies[f]

318

othercopies = all_copies.get(c)

333

othercopies = all_copies.get(c)

319

if othercopies is None:

334

if othercopies is None:

320

all_copies[c] = newcopies

335

all_copies[c] = newcopies

321

else:

336

else:

322

# we are the second parent to work on c, we need to merge our

337

# we are the second parent to work on c, we need to merge our

323

# work with the other.

338

# work with the other.

324

#

339

#

325

# Unlike when copies are stored in the filelog, we consider

340

# Unlike when copies are stored in the filelog, we consider

326

# it a copy even if the destination already existed on the

341

# it a copy even if the destination already existed on the

327

# other branch. It's simply too expensive to check if the

342

# other branch. It's simply too expensive to check if the

328

# file existed in the manifest.

343

# file existed in the manifest.

329

#

344

#

330

# In case of conflict, parent 1 take precedence over parent 2.

345

# In case of conflict, parent 1 take precedence over parent 2.

331

# This is an arbitrary choice made anew when implementing

346

# This is an arbitrary choice made anew when implementing

332

# changeset based copies. It was made without regards with

347

# changeset based copies. It was made without regards with

333

# potential filelog related behavior.

348

# potential filelog related behavior.

334

if parent == 1:

349

if parent == 1:

335

othercopies.update(newcopies)

350

othercopies.update(newcopies)

336

else:

351

else:

337

newcopies.update(othercopies)

352

newcopies.update(othercopies)

338

all_copies[c] = newcopies

353

all_copies[c] = newcopies

339

return all_copies[b.~~rev~~()]

354

return all_copies[targetrev]

340

355

341

356

342

def _forwardcopies(a, b, base=None, match=None):

357

def _forwardcopies(a, b, base=None, match=None):

343

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

358

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

344

359

345

if base is None:

360

if base is None:

346

base = a

361

base = a

347

match = a.repo().narrowmatch(match)

362

match = a.repo().narrowmatch(match)

348

# check for working copy

363

# check for working copy

349

if b.rev() is None:

364

if b.rev() is None:

350

cm = _committedforwardcopies(a, b.p1(), base, match)

365

cm = _committedforwardcopies(a, b.p1(), base, match)

351

# combine copies from dirstate if necessary

366

# combine copies from dirstate if necessary

352

copies = _chain(cm, _dirstatecopies(b._repo, match))

367

copies = _chain(cm, _dirstatecopies(b._repo, match))

353

else:

368

else:

354

copies = _committedforwardcopies(a, b, base, match)

369

copies = _committedforwardcopies(a, b, base, match)

355

return copies

370

return copies

356

371

357

372

358

def _backwardrenames(a, b, match):

373

def _backwardrenames(a, b, match):

359

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

374

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

360

return {}

375

return {}

361

376

362

# Even though we're not taking copies into account, 1:n rename situations

377

# Even though we're not taking copies into account, 1:n rename situations

363

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

378

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

364

# arbitrarily pick one of the renames.

379

# arbitrarily pick one of the renames.

365

# We don't want to pass in "match" here, since that would filter

380

# We don't want to pass in "match" here, since that would filter

366

# the destination by it. Since we're reversing the copies, we want

381

# the destination by it. Since we're reversing the copies, we want

367

# to filter the source instead.

382

# to filter the source instead.

368

f = _forwardcopies(b, a)

383

f = _forwardcopies(b, a)

369

r = {}

384

r = {}

370

for k, v in sorted(pycompat.iteritems(f)):

385

for k, v in sorted(pycompat.iteritems(f)):

371

if match and not match(v):

386

if match and not match(v):

372

continue

387

continue

373

# remove copies

388

# remove copies

374

if v in a:

389

if v in a:

375

continue

390

continue

376

r[v] = k

391

r[v] = k

377

return r

392

return r

378

393

379

394

380

def pathcopies(x, y, match=None):

395

def pathcopies(x, y, match=None):

381

"""find {dst@y: src@x} copy mapping for directed compare"""

396

"""find {dst@y: src@x} copy mapping for directed compare"""

382

repo = x._repo

397

repo = x._repo

383

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

398

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

384

if debug:

399

if debug:

385

repo.ui.debug(

400

repo.ui.debug(

386

b'debug.copies: searching copies from %s to %s\n' % (x, y)

401

b'debug.copies: searching copies from %s to %s\n' % (x, y)

387

)

402

)

388

if x == y or not x or not y:

403

if x == y or not x or not y:

389

return {}

404

return {}

390

a = y.ancestor(x)

405

a = y.ancestor(x)

391

if a == x:

406

if a == x:

392

if debug:

407

if debug:

393

repo.ui.debug(b'debug.copies: search mode: forward\n')

408

repo.ui.debug(b'debug.copies: search mode: forward\n')

394

if y.rev() is None and x == y.p1():

409

if y.rev() is None and x == y.p1():

395

# short-circuit to avoid issues with merge states

410

# short-circuit to avoid issues with merge states

396

return _dirstatecopies(repo, match)

411

return _dirstatecopies(repo, match)

397

copies = _forwardcopies(x, y, match=match)

412

copies = _forwardcopies(x, y, match=match)

398

elif a == y:

413

elif a == y:

399

if debug:

414

if debug:

400

repo.ui.debug(b'debug.copies: search mode: backward\n')

415

repo.ui.debug(b'debug.copies: search mode: backward\n')

401

copies = _backwardrenames(x, y, match=match)

416

copies = _backwardrenames(x, y, match=match)

402

else:

417

else:

403

if debug:

418

if debug:

404

repo.ui.debug(b'debug.copies: search mode: combined\n')

419

repo.ui.debug(b'debug.copies: search mode: combined\n')

405

base = None

420

base = None

406

if a.rev() != node.nullrev:

421

if a.rev() != node.nullrev:

407

base = x

422

base = x

408

copies = _chain(

423

copies = _chain(

409

_backwardrenames(x, a, match=match),

424

_backwardrenames(x, a, match=match),

410

_forwardcopies(a, y, base, match=match),

425

_forwardcopies(a, y, base, match=match),

411

)

426

)

412

_filter(x, y, copies)

427

_filter(x, y, copies)

413

return copies

428

return copies

414

429

415

430

416

def mergecopies(repo, c1, c2, base):

431

def mergecopies(repo, c1, c2, base):

417

"""

432

"""

418

Finds moves and copies between context c1 and c2 that are relevant for

433

Finds moves and copies between context c1 and c2 that are relevant for

419

merging. 'base' will be used as the merge base.

434

merging. 'base' will be used as the merge base.

420

435

421

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

436

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

422

files that were moved/ copied in one merge parent and modified in another.

437

files that were moved/ copied in one merge parent and modified in another.

423

For example:

438

For example:

424

439

425

o ---> 4 another commit

440

o ---> 4 another commit

426

|

441

|

427

| o ---> 3 commit that modifies a.txt

442

| o ---> 3 commit that modifies a.txt

428

| /

443

| /

429

o / ---> 2 commit that moves a.txt to b.txt

444

o / ---> 2 commit that moves a.txt to b.txt

430

|/

445

|/

431

o ---> 1 merge base

446

o ---> 1 merge base

432

447

433

If we try to rebase revision 3 on revision 4, since there is no a.txt in

448

If we try to rebase revision 3 on revision 4, since there is no a.txt in

434

revision 4, and if user have copytrace disabled, we prints the following

449

revision 4, and if user have copytrace disabled, we prints the following

435

message:

450

message:

436

451

437

```other changed <file> which local deleted```

452

```other changed <file> which local deleted```

438

453

439

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

454

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

440

"dirmove".

455

"dirmove".

441

456

442

"copy" is a mapping from destination name -> source name,

457

"copy" is a mapping from destination name -> source name,

443

where source is in c1 and destination is in c2 or vice-versa.

458

where source is in c1 and destination is in c2 or vice-versa.

444

459

445

"movewithdir" is a mapping from source name -> destination name,

460

"movewithdir" is a mapping from source name -> destination name,

446

where the file at source present in one context but not the other

461

where the file at source present in one context but not the other

447

needs to be moved to destination by the merge process, because the

462

needs to be moved to destination by the merge process, because the

448

other context moved the directory it is in.

463

other context moved the directory it is in.

449

464

450

"diverge" is a mapping of source name -> list of destination names

465

"diverge" is a mapping of source name -> list of destination names

451

for divergent renames.

466

for divergent renames.

452

467

453

"renamedelete" is a mapping of source name -> list of destination

468

"renamedelete" is a mapping of source name -> list of destination

454

names for files deleted in c1 that were renamed in c2 or vice-versa.

469

names for files deleted in c1 that were renamed in c2 or vice-versa.

455

470

456

"dirmove" is a mapping of detected source dir -> destination dir renames.

471

"dirmove" is a mapping of detected source dir -> destination dir renames.

457

This is needed for handling changes to new files previously grafted into

472

This is needed for handling changes to new files previously grafted into

458

renamed directories.

473

renamed directories.

459

474

460

This function calls different copytracing algorithms based on config.

475

This function calls different copytracing algorithms based on config.

461

"""

476

"""

462

# avoid silly behavior for update from empty dir

477

# avoid silly behavior for update from empty dir

463

if not c1 or not c2 or c1 == c2:

478

if not c1 or not c2 or c1 == c2:

464

return {}, {}, {}, {}, {}

479

return {}, {}, {}, {}, {}

465

480

466

narrowmatch = c1.repo().narrowmatch()

481

narrowmatch = c1.repo().narrowmatch()

467

482

468

# avoid silly behavior for parent -> working dir

483

# avoid silly behavior for parent -> working dir

469

if c2.node() is None and c1.node() == repo.dirstate.p1():

484

if c2.node() is None and c1.node() == repo.dirstate.p1():

470

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

485

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

471

486

472

copytracing = repo.ui.config(b'experimental', b'copytrace')

487

copytracing = repo.ui.config(b'experimental', b'copytrace')

473

if stringutil.parsebool(copytracing) is False:

488

if stringutil.parsebool(copytracing) is False:

474

# stringutil.parsebool() returns None when it is unable to parse the

489

# stringutil.parsebool() returns None when it is unable to parse the

475

# value, so we should rely on making sure copytracing is on such cases

490

# value, so we should rely on making sure copytracing is on such cases

476

return {}, {}, {}, {}, {}

491

return {}, {}, {}, {}, {}

477

492

478

if usechangesetcentricalgo(repo):

493

if usechangesetcentricalgo(repo):

479

# The heuristics don't make sense when we need changeset-centric algos

494

# The heuristics don't make sense when we need changeset-centric algos

480

return _fullcopytracing(repo, c1, c2, base)

495

return _fullcopytracing(repo, c1, c2, base)

481

496

482

# Copy trace disabling is explicitly below the node == p1 logic above

497

# Copy trace disabling is explicitly below the node == p1 logic above

483

# because the logic above is required for a simple copy to be kept across a

498

# because the logic above is required for a simple copy to be kept across a

484

# rebase.

499

# rebase.

485

if copytracing == b'heuristics':

500

if copytracing == b'heuristics':

486

# Do full copytracing if only non-public revisions are involved as

501

# Do full copytracing if only non-public revisions are involved as

487

# that will be fast enough and will also cover the copies which could

502

# that will be fast enough and will also cover the copies which could

488

# be missed by heuristics

503

# be missed by heuristics

489

if _isfullcopytraceable(repo, c1, base):

504

if _isfullcopytraceable(repo, c1, base):

490

return _fullcopytracing(repo, c1, c2, base)

505

return _fullcopytracing(repo, c1, c2, base)

491

return _heuristicscopytracing(repo, c1, c2, base)

506

return _heuristicscopytracing(repo, c1, c2, base)

492

else:

507

else:

493

return _fullcopytracing(repo, c1, c2, base)

508

return _fullcopytracing(repo, c1, c2, base)

494

509

495

510

496

def _isfullcopytraceable(repo, c1, base):

511

def _isfullcopytraceable(repo, c1, base):

497

""" Checks that if base, source and destination are all no-public branches,

512

""" Checks that if base, source and destination are all no-public branches,

498

if yes let's use the full copytrace algorithm for increased capabilities

513

if yes let's use the full copytrace algorithm for increased capabilities

499

since it will be fast enough.

514

since it will be fast enough.

500

515

501

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

516

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

502

number of changesets from c1 to base such that if number of changesets are

517

number of changesets from c1 to base such that if number of changesets are

503

more than the limit, full copytracing algorithm won't be used.

518

more than the limit, full copytracing algorithm won't be used.

504

"""

519

"""

505

if c1.rev() is None:

520

if c1.rev() is None:

506

c1 = c1.p1()

521

c1 = c1.p1()

507

if c1.mutable() and base.mutable():

522

if c1.mutable() and base.mutable():

508

sourcecommitlimit = repo.ui.configint(

523

sourcecommitlimit = repo.ui.configint(

509

b'experimental', b'copytrace.sourcecommitlimit'

524

b'experimental', b'copytrace.sourcecommitlimit'

510

)

525

)

511

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

526

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

512

return commits < sourcecommitlimit

527

return commits < sourcecommitlimit

513

return False

528

return False

514

529

515

530

516

def _checksinglesidecopies(

531

def _checksinglesidecopies(

517

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

532

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

518

):

533

):

519

if src not in m2:

534

if src not in m2:

520

# deleted on side 2

535

# deleted on side 2

521

if src not in m1:

536

if src not in m1:

522

# renamed on side 1, deleted on side 2

537

# renamed on side 1, deleted on side 2

523

renamedelete[src] = dsts1

538

renamedelete[src] = dsts1

524

elif m2[src] != mb[src]:

539

elif m2[src] != mb[src]:

525

if not _related(c2[src], base[src]):

540

if not _related(c2[src], base[src]):

526

return

541

return

527

# modified on side 2

542

# modified on side 2

528

for dst in dsts1:

543

for dst in dsts1:

529

if dst not in m2:

544

if dst not in m2:

530

# dst not added on side 2 (handle as regular

545

# dst not added on side 2 (handle as regular

531

# "both created" case in manifestmerge otherwise)

546

# "both created" case in manifestmerge otherwise)

532

copy[dst] = src

547

copy[dst] = src

533

548

534

549

535

def _fullcopytracing(repo, c1, c2, base):

550

def _fullcopytracing(repo, c1, c2, base):

536

""" The full copytracing algorithm which finds all the new files that were

551

""" The full copytracing algorithm which finds all the new files that were

537

added from merge base up to the top commit and for each file it checks if

552

added from merge base up to the top commit and for each file it checks if

538

this file was copied from another file.

553

this file was copied from another file.

539

554

540

This is pretty slow when a lot of changesets are involved but will track all

555

This is pretty slow when a lot of changesets are involved but will track all

541

the copies.

556

the copies.

542

"""

557

"""

543

m1 = c1.manifest()

558

m1 = c1.manifest()

544

m2 = c2.manifest()

559

m2 = c2.manifest()

545

mb = base.manifest()

560

mb = base.manifest()

546

561

547

copies1 = pathcopies(base, c1)

562

copies1 = pathcopies(base, c1)

548

copies2 = pathcopies(base, c2)

563

copies2 = pathcopies(base, c2)

549

564

550

inversecopies1 = {}

565

inversecopies1 = {}

551

inversecopies2 = {}

566

inversecopies2 = {}

552

for dst, src in copies1.items():

567

for dst, src in copies1.items():

553

inversecopies1.setdefault(src, []).append(dst)

568

inversecopies1.setdefault(src, []).append(dst)

554

for dst, src in copies2.items():

569

for dst, src in copies2.items():

555

inversecopies2.setdefault(src, []).append(dst)

570

inversecopies2.setdefault(src, []).append(dst)

556

571

557

copy = {}

572

copy = {}

558

diverge = {}

573

diverge = {}

559

renamedelete = {}

574

renamedelete = {}

560

allsources = set(inversecopies1) | set(inversecopies2)

575

allsources = set(inversecopies1) | set(inversecopies2)

561

for src in allsources:

576

for src in allsources:

562

dsts1 = inversecopies1.get(src)

577

dsts1 = inversecopies1.get(src)

563

dsts2 = inversecopies2.get(src)

578

dsts2 = inversecopies2.get(src)

564

if dsts1 and dsts2:

579

if dsts1 and dsts2:

565

# copied/renamed on both sides

580

# copied/renamed on both sides

566

if src not in m1 and src not in m2:

581

if src not in m1 and src not in m2:

567

# renamed on both sides

582

# renamed on both sides

568

dsts1 = set(dsts1)

583

dsts1 = set(dsts1)

569

dsts2 = set(dsts2)

584

dsts2 = set(dsts2)

570

# If there's some overlap in the rename destinations, we

585

# If there's some overlap in the rename destinations, we

571

# consider it not divergent. For example, if side 1 copies 'a'

586

# consider it not divergent. For example, if side 1 copies 'a'

572

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

587

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

573

# and 'd' and deletes 'a'.

588

# and 'd' and deletes 'a'.

574

if dsts1 & dsts2:

589

if dsts1 & dsts2:

575

for dst in dsts1 & dsts2:

590

for dst in dsts1 & dsts2:

576

copy[dst] = src

591

copy[dst] = src

577

else:

592

else:

578

diverge[src] = sorted(dsts1 | dsts2)

593

diverge[src] = sorted(dsts1 | dsts2)

579

elif src in m1 and src in m2:

594

elif src in m1 and src in m2:

580

# copied on both sides

595

# copied on both sides

581

dsts1 = set(dsts1)

596

dsts1 = set(dsts1)

582

dsts2 = set(dsts2)

597

dsts2 = set(dsts2)

583

for dst in dsts1 & dsts2:

598

for dst in dsts1 & dsts2:

584

copy[dst] = src

599

copy[dst] = src

585

# TODO: Handle cases where it was renamed on one side and copied

600

# TODO: Handle cases where it was renamed on one side and copied

586

# on the other side

601

# on the other side

587

elif dsts1:

602

elif dsts1:

588

# copied/renamed only on side 1

603

# copied/renamed only on side 1

589

_checksinglesidecopies(

604

_checksinglesidecopies(

590

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

605

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

591

)

606

)

592

elif dsts2:

607

elif dsts2:

593

# copied/renamed only on side 2

608

# copied/renamed only on side 2

594

_checksinglesidecopies(

609

_checksinglesidecopies(

595

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

610

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

596

)

611

)

597

612

598

renamedeleteset = set()

613

renamedeleteset = set()

599

divergeset = set()

614

divergeset = set()

600

for dsts in diverge.values():

615

for dsts in diverge.values():

601

divergeset.update(dsts)

616

divergeset.update(dsts)

602

for dsts in renamedelete.values():

617

for dsts in renamedelete.values():

603

renamedeleteset.update(dsts)

618

renamedeleteset.update(dsts)

604

619

605

# find interesting file sets from manifests

620

# find interesting file sets from manifests

606

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

621

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

607

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

622

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

608

u1 = sorted(addedinm1 - addedinm2)

623

u1 = sorted(addedinm1 - addedinm2)

609

u2 = sorted(addedinm2 - addedinm1)

624

u2 = sorted(addedinm2 - addedinm1)

610

625

611

header = b" unmatched files in %s"

626

header = b" unmatched files in %s"

612

if u1:

627

if u1:

613

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

628

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

614

if u2:

629

if u2:

615

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

630

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

616

631

617

fullcopy = copies1.copy()

632

fullcopy = copies1.copy()

618

fullcopy.update(copies2)

633

fullcopy.update(copies2)

619

if not fullcopy:

634

if not fullcopy:

620

return copy, {}, diverge, renamedelete, {}

635

return copy, {}, diverge, renamedelete, {}

621

636

622

if repo.ui.debugflag:

637

if repo.ui.debugflag:

623

repo.ui.debug(

638

repo.ui.debug(

624

b" all copies found (* = to merge, ! = divergent, "

639

b" all copies found (* = to merge, ! = divergent, "

625

b"% = renamed and deleted):\n"

640

b"% = renamed and deleted):\n"

626

)

641

)

627

for f in sorted(fullcopy):

642

for f in sorted(fullcopy):

628

note = b""

643

note = b""

629

if f in copy:

644

if f in copy:

630

note += b"*"

645

note += b"*"

631

if f in divergeset:

646

if f in divergeset:

632

note += b"!"

647

note += b"!"

633

if f in renamedeleteset:

648

if f in renamedeleteset:

634

note += b"%"

649

note += b"%"

635

repo.ui.debug(

650

repo.ui.debug(

636

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

651

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

637

)

652

)

638

del divergeset

653

del divergeset

639

654

640

repo.ui.debug(b" checking for directory renames\n")

655

repo.ui.debug(b" checking for directory renames\n")

641

656

642

# generate a directory move map

657

# generate a directory move map

643

d1, d2 = c1.dirs(), c2.dirs()

658

d1, d2 = c1.dirs(), c2.dirs()

644

invalid = set()

659

invalid = set()

645

dirmove = {}

660

dirmove = {}

646

661

647

# examine each file copy for a potential directory move, which is

662

# examine each file copy for a potential directory move, which is

648

# when all the files in a directory are moved to a new directory

663

# when all the files in a directory are moved to a new directory

649

for dst, src in pycompat.iteritems(fullcopy):

664

for dst, src in pycompat.iteritems(fullcopy):

650

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

665

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

651

if dsrc in invalid:

666

if dsrc in invalid:

652

# already seen to be uninteresting

667

# already seen to be uninteresting

653

continue

668

continue

654

elif dsrc in d1 and ddst in d1:

669

elif dsrc in d1 and ddst in d1:

655

# directory wasn't entirely moved locally

670

# directory wasn't entirely moved locally

656

invalid.add(dsrc)

671

invalid.add(dsrc)

657

elif dsrc in d2 and ddst in d2:

672

elif dsrc in d2 and ddst in d2:

658

# directory wasn't entirely moved remotely

673

# directory wasn't entirely moved remotely

659

invalid.add(dsrc)

674

invalid.add(dsrc)

660

elif dsrc in dirmove and dirmove[dsrc] != ddst:

675

elif dsrc in dirmove and dirmove[dsrc] != ddst:

661

# files from the same directory moved to two different places

676

# files from the same directory moved to two different places

662

invalid.add(dsrc)

677

invalid.add(dsrc)

663

else:

678

else:

664

# looks good so far

679

# looks good so far

665

dirmove[dsrc] = ddst

680

dirmove[dsrc] = ddst

666

681

667

for i in invalid:

682

for i in invalid:

668

if i in dirmove:

683

if i in dirmove:

669

del dirmove[i]

684

del dirmove[i]

670

del d1, d2, invalid

685

del d1, d2, invalid

671

686

672

if not dirmove:

687

if not dirmove:

673

return copy, {}, diverge, renamedelete, {}

688

return copy, {}, diverge, renamedelete, {}

674

689

675

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

690

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

676

691

677

for d in dirmove:

692

for d in dirmove:

678

repo.ui.debug(

693

repo.ui.debug(

679

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

694

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

680

)

695

)

681

696

682

movewithdir = {}

697

movewithdir = {}

683

# check unaccounted nonoverlapping files against directory moves

698

# check unaccounted nonoverlapping files against directory moves

684

for f in u1 + u2:

699

for f in u1 + u2:

685

if f not in fullcopy:

700

if f not in fullcopy:

686

for d in dirmove:

701

for d in dirmove:

687

if f.startswith(d):

702

if f.startswith(d):

688

# new file added in a directory that was moved, move it

703

# new file added in a directory that was moved, move it

689

df = dirmove[d] + f[len(d) :]

704

df = dirmove[d] + f[len(d) :]

690

if df not in copy:

705

if df not in copy:

691

movewithdir[f] = df

706

movewithdir[f] = df

692

repo.ui.debug(

707

repo.ui.debug(

693

b" pending file src: '%s' -> dst: '%s'\n"

708

b" pending file src: '%s' -> dst: '%s'\n"

694

% (f, df)

709

% (f, df)

695

)

710

)

696

break

711

break

697

712

698

return copy, movewithdir, diverge, renamedelete, dirmove

713

return copy, movewithdir, diverge, renamedelete, dirmove

699

714

700

715

701

def _heuristicscopytracing(repo, c1, c2, base):

716

def _heuristicscopytracing(repo, c1, c2, base):

702

""" Fast copytracing using filename heuristics

717

""" Fast copytracing using filename heuristics

703

718

704

Assumes that moves or renames are of following two types:

719

Assumes that moves or renames are of following two types:

705

720

706

1) Inside a directory only (same directory name but different filenames)

721

1) Inside a directory only (same directory name but different filenames)

707

2) Move from one directory to another

722

2) Move from one directory to another

708

(same filenames but different directory names)

723

(same filenames but different directory names)

709

724

710

Works only when there are no merge commits in the "source branch".

725

Works only when there are no merge commits in the "source branch".

711

Source branch is commits from base up to c2 not including base.

726

Source branch is commits from base up to c2 not including base.

712

727

713

If merge is involved it fallbacks to _fullcopytracing().

728

If merge is involved it fallbacks to _fullcopytracing().

714

729

715

Can be used by setting the following config:

730

Can be used by setting the following config:

716

731

717

[experimental]

732

[experimental]

718

copytrace = heuristics

733

copytrace = heuristics

719

734

720

In some cases the copy/move candidates found by heuristics can be very large

735

In some cases the copy/move candidates found by heuristics can be very large

721

in number and that will make the algorithm slow. The number of possible

736

in number and that will make the algorithm slow. The number of possible

722

candidates to check can be limited by using the config

737

candidates to check can be limited by using the config

723

`experimental.copytrace.movecandidateslimit` which defaults to 100.

738

`experimental.copytrace.movecandidateslimit` which defaults to 100.

724

"""

739

"""

725

740

726

if c1.rev() is None:

741

if c1.rev() is None:

727

c1 = c1.p1()

742

c1 = c1.p1()

728

if c2.rev() is None:

743

if c2.rev() is None:

729

c2 = c2.p1()

744

c2 = c2.p1()

730

745

731

copies = {}

746

copies = {}

732

747

733

changedfiles = set()

748

changedfiles = set()

734

m1 = c1.manifest()

749

m1 = c1.manifest()

735

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

750

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

736

# If base is not in c2 branch, we switch to fullcopytracing

751

# If base is not in c2 branch, we switch to fullcopytracing

737

repo.ui.debug(

752

repo.ui.debug(

738

b"switching to full copytracing as base is not "

753

b"switching to full copytracing as base is not "

739

b"an ancestor of c2\n"

754

b"an ancestor of c2\n"

740

)

755

)

741

return _fullcopytracing(repo, c1, c2, base)

756

return _fullcopytracing(repo, c1, c2, base)

742

757

743

ctx = c2

758

ctx = c2

744

while ctx != base:

759

while ctx != base:

745

if len(ctx.parents()) == 2:

760

if len(ctx.parents()) == 2:

746

# To keep things simple let's not handle merges

761

# To keep things simple let's not handle merges

747

repo.ui.debug(b"switching to full copytracing because of merges\n")

762

repo.ui.debug(b"switching to full copytracing because of merges\n")

748

return _fullcopytracing(repo, c1, c2, base)

763

return _fullcopytracing(repo, c1, c2, base)

749

changedfiles.update(ctx.files())

764

changedfiles.update(ctx.files())

750

ctx = ctx.p1()

765

ctx = ctx.p1()

751

766

752

cp = _forwardcopies(base, c2)

767

cp = _forwardcopies(base, c2)

753

for dst, src in pycompat.iteritems(cp):

768

for dst, src in pycompat.iteritems(cp):

754

if src in m1:

769

if src in m1:

755

copies[dst] = src

770

copies[dst] = src

756

771

757

# file is missing if it isn't present in the destination, but is present in

772

# file is missing if it isn't present in the destination, but is present in

758

# the base and present in the source.

773

# the base and present in the source.

759

# Presence in the base is important to exclude added files, presence in the

774

# Presence in the base is important to exclude added files, presence in the

760

# source is important to exclude removed files.

775

# source is important to exclude removed files.

761

filt = lambda f: f not in m1 and f in base and f in c2

776

filt = lambda f: f not in m1 and f in base and f in c2

762

missingfiles = [f for f in changedfiles if filt(f)]

777

missingfiles = [f for f in changedfiles if filt(f)]

763

778

764

if missingfiles:

779

if missingfiles:

765

basenametofilename = collections.defaultdict(list)

780

basenametofilename = collections.defaultdict(list)

766

dirnametofilename = collections.defaultdict(list)

781

dirnametofilename = collections.defaultdict(list)

767

782

768

for f in m1.filesnotin(base.manifest()):

783

for f in m1.filesnotin(base.manifest()):

769

basename = os.path.basename(f)

784

basename = os.path.basename(f)

770

dirname = os.path.dirname(f)

785

dirname = os.path.dirname(f)

771

basenametofilename[basename].append(f)

786

basenametofilename[basename].append(f)

772

dirnametofilename[dirname].append(f)

787

dirnametofilename[dirname].append(f)

773

788

774

for f in missingfiles:

789

for f in missingfiles:

775

basename = os.path.basename(f)

790

basename = os.path.basename(f)

776

dirname = os.path.dirname(f)

791

dirname = os.path.dirname(f)

777

samebasename = basenametofilename[basename]

792

samebasename = basenametofilename[basename]

778

samedirname = dirnametofilename[dirname]

793

samedirname = dirnametofilename[dirname]

779

movecandidates = samebasename + samedirname

794

movecandidates = samebasename + samedirname

780

# f is guaranteed to be present in c2, that's why

795

# f is guaranteed to be present in c2, that's why

781

# c2.filectx(f) won't fail

796

# c2.filectx(f) won't fail

782

f2 = c2.filectx(f)

797

f2 = c2.filectx(f)

783

# we can have a lot of candidates which can slow down the heuristics

798

# we can have a lot of candidates which can slow down the heuristics

784

# config value to limit the number of candidates moves to check

799

# config value to limit the number of candidates moves to check

785

maxcandidates = repo.ui.configint(

800

maxcandidates = repo.ui.configint(

786

b'experimental', b'copytrace.movecandidateslimit'

801

b'experimental', b'copytrace.movecandidateslimit'

787

)

802

)

788

803

789

if len(movecandidates) > maxcandidates:

804

if len(movecandidates) > maxcandidates:

790

repo.ui.status(

805

repo.ui.status(

791

_(

806

_(

792

b"skipping copytracing for '%s', more "

807

b"skipping copytracing for '%s', more "

793

b"candidates than the limit: %d\n"

808

b"candidates than the limit: %d\n"

794

)

809

)

795

% (f, len(movecandidates))

810

% (f, len(movecandidates))

796

)

811

)

797

continue

812

continue

798

813

799

for candidate in movecandidates:

814

for candidate in movecandidates:

800

f1 = c1.filectx(candidate)

815

f1 = c1.filectx(candidate)

801

if _related(f1, f2):

816

if _related(f1, f2):

802

# if there are a few related copies then we'll merge

817

# if there are a few related copies then we'll merge

803

# changes into all of them. This matches the behaviour

818

# changes into all of them. This matches the behaviour

804

# of upstream copytracing

819

# of upstream copytracing

805

copies[candidate] = f

820

copies[candidate] = f

806

821

807

return copies, {}, {}, {}, {}

822

return copies, {}, {}, {}, {}

808

823

809

824

810

def _related(f1, f2):

825

def _related(f1, f2):

811

"""return True if f1 and f2 filectx have a common ancestor

826

"""return True if f1 and f2 filectx have a common ancestor

812

827

813

Walk back to common ancestor to see if the two files originate

828

Walk back to common ancestor to see if the two files originate

814

from the same file. Since workingfilectx's rev() is None it messes

829

from the same file. Since workingfilectx's rev() is None it messes

815

up the integer comparison logic, hence the pre-step check for

830

up the integer comparison logic, hence the pre-step check for

816

None (f1 and f2 can only be workingfilectx's initially).

831

None (f1 and f2 can only be workingfilectx's initially).

817

"""

832

"""

818

833

819

if f1 == f2:

834

if f1 == f2:

820

return True # a match

835

return True # a match

821

836

822

g1, g2 = f1.ancestors(), f2.ancestors()

837

g1, g2 = f1.ancestors(), f2.ancestors()

823

try:

838

try:

824

f1r, f2r = f1.linkrev(), f2.linkrev()

839

f1r, f2r = f1.linkrev(), f2.linkrev()

825

840

826

if f1r is None:

841

if f1r is None:

827

f1 = next(g1)

842

f1 = next(g1)

828

if f2r is None:

843

if f2r is None:

829

f2 = next(g2)

844

f2 = next(g2)

830

845

831

while True:

846

while True:

832

f1r, f2r = f1.linkrev(), f2.linkrev()

847

f1r, f2r = f1.linkrev(), f2.linkrev()

833

if f1r > f2r:

848

if f1r > f2r:

834

f1 = next(g1)

849

f1 = next(g1)

835

elif f2r > f1r:

850

elif f2r > f1r:

836

f2 = next(g2)

851

f2 = next(g2)

837

else: # f1 and f2 point to files in the same linkrev

852

else: # f1 and f2 point to files in the same linkrev

838

return f1 == f2 # true if they point to the same file

853

return f1 == f2 # true if they point to the same file

839

except StopIteration:

854

except StopIteration:

840

return False

855

return False

841

856

842

857

843

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

858

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

844

"""reproduce copies from fromrev to rev in the dirstate

859

"""reproduce copies from fromrev to rev in the dirstate

845

860

846

If skiprev is specified, it's a revision that should be used to

861

If skiprev is specified, it's a revision that should be used to

847

filter copy records. Any copies that occur between fromrev and

862

filter copy records. Any copies that occur between fromrev and

848

skiprev will not be duplicated, even if they appear in the set of

863

skiprev will not be duplicated, even if they appear in the set of

849

copies between fromrev and rev.

864

copies between fromrev and rev.

850

"""

865

"""

851

exclude = {}

866

exclude = {}

852

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

867

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

853

bctrace = stringutil.parsebool(ctraceconfig)

868

bctrace = stringutil.parsebool(ctraceconfig)

854

if skiprev is not None and (

869

if skiprev is not None and (

855

ctraceconfig == b'heuristics' or bctrace or bctrace is None

870

ctraceconfig == b'heuristics' or bctrace or bctrace is None

856

):

871

):

857

# copytrace='off' skips this line, but not the entire function because

872

# copytrace='off' skips this line, but not the entire function because

858

# the line below is O(size of the repo) during a rebase, while the rest

873

# the line below is O(size of the repo) during a rebase, while the rest

859

# of the function is much faster (and is required for carrying copy

874

# of the function is much faster (and is required for carrying copy

860

# metadata across the rebase anyway).

875

# metadata across the rebase anyway).

861

exclude = pathcopies(repo[fromrev], repo[skiprev])

876

exclude = pathcopies(repo[fromrev], repo[skiprev])

862

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

877

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

863

if dst in exclude:

878

if dst in exclude:

864

continue

879

continue

865

if dst in wctx:

880

if dst in wctx:

866

wctx[dst].markcopied(src)

881

wctx[dst].markcopied(src)

867

882

868

883

869

def computechangesetfilesadded(ctx):

884

def computechangesetfilesadded(ctx):

870

"""return the list of files added in a changeset

885

"""return the list of files added in a changeset

871

"""

886

"""

872

added = []

887

added = []

873

for f in ctx.files():

888

for f in ctx.files():

874

if not any(f in p for p in ctx.parents()):

889

if not any(f in p for p in ctx.parents()):

875

added.append(f)

890

added.append(f)

876

return added

891

return added

877

892

878

893

879

def computechangesetfilesremoved(ctx):

894

def computechangesetfilesremoved(ctx):

880

"""return the list of files removed in a changeset

895

"""return the list of files removed in a changeset

881

"""

896

"""

882

removed = []

897

removed = []

883

for f in ctx.files():

898

for f in ctx.files():

884

if f not in ctx:

899

if f not in ctx:

885

removed.append(f)

900

removed.append(f)

886

return removed

901

return removed

887

902

888

903

889

def computechangesetcopies(ctx):

904

def computechangesetcopies(ctx):

890

"""return the copies data for a changeset

905

"""return the copies data for a changeset

891

906

892

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

907

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

893

908

894

Each dictionnary are in the form: `{newname: oldname}`

909

Each dictionnary are in the form: `{newname: oldname}`

895

"""

910

"""

896

p1copies = {}

911

p1copies = {}

897

p2copies = {}

912

p2copies = {}

898

p1 = ctx.p1()

913

p1 = ctx.p1()

899

p2 = ctx.p2()

914

p2 = ctx.p2()

900

narrowmatch = ctx._repo.narrowmatch()

915

narrowmatch = ctx._repo.narrowmatch()

901

for dst in ctx.files():

916

for dst in ctx.files():

902

if not narrowmatch(dst) or dst not in ctx:

917

if not narrowmatch(dst) or dst not in ctx:

903

continue

918

continue

904

copied = ctx[dst].renamed()

919

copied = ctx[dst].renamed()

905

if not copied:

920

if not copied:

906

continue

921

continue

907

src, srcnode = copied

922

src, srcnode = copied

908

if src in p1 and p1[src].filenode() == srcnode:

923

if src in p1 and p1[src].filenode() == srcnode:

909

p1copies[dst] = src

924

p1copies[dst] = src

910

elif src in p2 and p2[src].filenode() == srcnode:

925

elif src in p2 and p2[src].filenode() == srcnode:

911

p2copies[dst] = src

926

p2copies[dst] = src

912

return p1copies, p2copies

927

return p1copies, p2copies

913

928

914

929

915

def encodecopies(files, copies):

930

def encodecopies(files, copies):

916

items = []

931

items = []

917

for i, dst in enumerate(files):

932

for i, dst in enumerate(files):

918

if dst in copies:

933

if dst in copies:

919

items.append(b'%d\0%s' % (i, copies[dst]))

934

items.append(b'%d\0%s' % (i, copies[dst]))

920

if len(items) != len(copies):

935

if len(items) != len(copies):

921

raise error.ProgrammingError(

936

raise error.ProgrammingError(

922

b'some copy targets missing from file list'

937

b'some copy targets missing from file list'

923

)

938

)

924

return b"\n".join(items)

939

return b"\n".join(items)

925

940

926

941

927

def decodecopies(files, data):

942

def decodecopies(files, data):

928

try:

943

try:

929

copies = {}

944

copies = {}

930

if not data:

945

if not data:

931

return copies

946

return copies

932

for l in data.split(b'\n'):

947

for l in data.split(b'\n'):

933

strindex, src = l.split(b'\0')

948

strindex, src = l.split(b'\0')

934

i = int(strindex)

949

i = int(strindex)

935

dst = files[i]

950

dst = files[i]

936

copies[dst] = src

951

copies[dst] = src

937

return copies

952

return copies

938

except (ValueError, IndexError):

953

except (ValueError, IndexError):

939

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

954

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

940

# used different syntax for the value.

955

# used different syntax for the value.

941

return None

956

return None

942

957

943

958

944

def encodefileindices(files, subset):

959

def encodefileindices(files, subset):

945

subset = set(subset)

960

subset = set(subset)

946

indices = []

961

indices = []

947

for i, f in enumerate(files):

962

for i, f in enumerate(files):

948

if f in subset:

963

if f in subset:

949

indices.append(b'%d' % i)

964

indices.append(b'%d' % i)

950

return b'\n'.join(indices)

965

return b'\n'.join(indices)

951

966

952

967

953

def decodefileindices(files, data):

968

def decodefileindices(files, data):

954

try:

969

try:

955

subset = []

970

subset = []

956

if not data:

971

if not data:

957

return subset

972

return subset

958

for strindex in data.split(b'\n'):

973

for strindex in data.split(b'\n'):

959

i = int(strindex)

974

i = int(strindex)

960

if i < 0 or i >= len(files):

975

if i < 0 or i >= len(files):

961

return None

976

return None

962

subset.append(files[i])

977

subset.append(files[i])

963

return subset

978

return subset

964

except (ValueError, IndexError):

979

except (ValueError, IndexError):

965

# Perhaps someone had chosen the same key name (e.g. "added") and

980

# Perhaps someone had chosen the same key name (e.g. "added") and

966

# used different syntax for the value.

981

# used different syntax for the value.

967

return None

982

return None

968

983

969

984

970

def _getsidedata(srcrepo, rev):

985

def _getsidedata(srcrepo, rev):

971

ctx = srcrepo[rev]

986

ctx = srcrepo[rev]

972

filescopies = computechangesetcopies(ctx)

987

filescopies = computechangesetcopies(ctx)

973

filesadded = computechangesetfilesadded(ctx)

988

filesadded = computechangesetfilesadded(ctx)

974

filesremoved = computechangesetfilesremoved(ctx)

989

filesremoved = computechangesetfilesremoved(ctx)

975

sidedata = {}

990

sidedata = {}

976

if any([filescopies, filesadded, filesremoved]):

991

if any([filescopies, filesadded, filesremoved]):

977

sortedfiles = sorted(ctx.files())

992

sortedfiles = sorted(ctx.files())

978

p1copies, p2copies = filescopies

993

p1copies, p2copies = filescopies

979

p1copies = encodecopies(sortedfiles, p1copies)

994

p1copies = encodecopies(sortedfiles, p1copies)

980

p2copies = encodecopies(sortedfiles, p2copies)

995

p2copies = encodecopies(sortedfiles, p2copies)

981

filesadded = encodefileindices(sortedfiles, filesadded)

996

filesadded = encodefileindices(sortedfiles, filesadded)

982

filesremoved = encodefileindices(sortedfiles, filesremoved)

997

filesremoved = encodefileindices(sortedfiles, filesremoved)

983

if p1copies:

998

if p1copies:

984

sidedata[sidedatamod.SD_P1COPIES] = p1copies

999

sidedata[sidedatamod.SD_P1COPIES] = p1copies

985

if p2copies:

1000

if p2copies:

986

sidedata[sidedatamod.SD_P2COPIES] = p2copies

1001

sidedata[sidedatamod.SD_P2COPIES] = p2copies

987

if filesadded:

1002

if filesadded:

988

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1003

sidedata[sidedatamod.SD_FILESADDED] = filesadded

989

if filesremoved:

1004

if filesremoved:

990

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1005

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

991

return sidedata

1006

return sidedata

992

1007

993

1008

994

def getsidedataadder(srcrepo, destrepo):

1009

def getsidedataadder(srcrepo, destrepo):

995

def sidedatacompanion(revlog, rev):

1010

def sidedatacompanion(revlog, rev):

996

sidedata = {}

1011

sidedata = {}

997

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1012

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

998

sidedata = _getsidedata(srcrepo, rev)

1013

sidedata = _getsidedata(srcrepo, rev)

999

return False, (), sidedata

1014

return False, (), sidedata

1000

1015

1001

return sidedatacompanion

1016

return sidedatacompanion

1002

1017

1003

1018

1004

def getsidedataremover(srcrepo, destrepo):

1019

def getsidedataremover(srcrepo, destrepo):

1005

def sidedatacompanion(revlog, rev):

1020

def sidedatacompanion(revlog, rev):

1006

f = ()

1021

f = ()

1007

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1022

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1008

if revlog.flags(rev) & REVIDX_SIDEDATA:

1023

if revlog.flags(rev) & REVIDX_SIDEDATA:

1009

f = (

1024

f = (

1010

sidedatamod.SD_P1COPIES,

1025

sidedatamod.SD_P1COPIES,

1011

sidedatamod.SD_P2COPIES,

1026

sidedatamod.SD_P2COPIES,

1012

sidedatamod.SD_FILESADDED,

1027

sidedatamod.SD_FILESADDED,

1013

sidedatamod.SD_FILESREMOVED,

1028

sidedatamod.SD_FILESREMOVED,

1014

)

1029

)

1015

return False, f, {}

1030

return False, f, {}

1016

1031

1017

return sidedatacompanion

1032

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     # A small cache to avoid doing the work twice for merges
                     #
                     # In the vast majority of cases, if we ask information for a revision
                     # about 1 parent, we'll later ask it for the other. So it make sense to
                     # keep the information around when reaching the first parent of a merge
                     # and dropping it after it was provided for the second parents.
                     #
                     # It exists cases were only one parent of the merge will be walked. It
                     # happens when the "destination" the copy tracing is descendant from a
                     # new root, not common with the "source". In that case, we will only walk
                     # through merge parents that are descendant of changesets common
                     # between "source" and "destination".
                     #
                     # With the current case implementation if such changesets have a copy
                     # information, we'll keep them in memory until the end of
                     # _changesetforwardcopies. We don't expect the case to be frequent
                     # enough to matters.
                     #
                     # In addition, it would be possible to reach pathological case, were
                     # many first parent are met before any second parent is reached. In
                     # that case the cache could grow. If this even become an issue one can
                     # safely introduce a maximum cache size. This would trade extra CPU/IO
                     # time to save memory.
                     merge_caches = {}
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
                             e = merge_caches.pop(rev, None)
                             if e is not None:
                                 return e
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                             if p1 != node.nullrev and p2 != node.nullrev:
                                 # XXX some case we over cache, IGNORE
                                 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = []
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
+                revs = sorted(iterrevs)
+                return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
+            def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
+                """combine the copies information for each item of iterrevs
+                revs: sorted iterable of revision to visit
+                children: a {parent: [children]} mapping.
+                targetrev: the final copies destination revision (not in iterrevs)
+                revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
+                match: a matcher
+                It returns the aggregated copies information for `targetrev`.
+                """
                 all_copies = {}
                 alwaysmatch = match.always()
-                for r in sorted(iterrevs):
+                for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                             # _chain makes a copies, we can avoid doing so in some
                             # simple/linear cases.
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
-                return all_copies[b.rev()]
+                return all_copies[targetrev]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion