upstream/mercurial-mirror Commit - r43595:90213d02

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

18

from . import (

18

from . import (

19

error,

19

error,

20

match as matchmod,

20

match as matchmod,

21

node,

21

node,

22

pathutil,

22

pathutil,

23

pycompat,

23

pycompat,

24

util,

24

util,

25

)

25

)

26

27

from .revlogutils import sidedata as sidedatamod

27

from .revlogutils import sidedata as sidedatamod

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

32

def _filter(src, dst, t):

32

def _filter(src, dst, t):

33

"""filters out invalid copies after chaining"""

33

"""filters out invalid copies after chaining"""

34

35

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

35

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

36

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# in the following table (not including trivial cases). For example, case 2

37

# in the following table (not including trivial cases). For example, case 2

38

# is where a file existed in 'src' and remained under that name in 'mid' and

38

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# then was renamed between 'mid' and 'dst'.

39

# then was renamed between 'mid' and 'dst'.

40

#

40

#

41

# case src mid dst result

41

# case src mid dst result

42

# 1 x y - -

42

# 1 x y - -

43

# 2 x y y x->y

43

# 2 x y y x->y

44

# 3 x y x -

44

# 3 x y x -

45

# 4 x y z x->z

45

# 4 x y z x->z

46

# 5 - x y -

46

# 5 - x y -

47

# 6 x x y x->y

47

# 6 x x y x->y

48

#

48

#

49

# _chain() takes care of chaining the copies in 'a' and 'b', but it

49

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

50

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# between 5 and 6, so it includes all cases in its result.

51

# between 5 and 6, so it includes all cases in its result.

52

# Cases 1, 3, and 5 are then removed by _filter().

52

# Cases 1, 3, and 5 are then removed by _filter().

53

54

for k, v in list(t.items()):

54

for k, v in list(t.items()):

55

# remove copies from files that didn't exist

55

# remove copies from files that didn't exist

56

if v not in src:

56

if v not in src:

57

del t[k]

57

del t[k]

58

# remove criss-crossed copies

58

# remove criss-crossed copies

59

elif k in src and v in dst:

59

elif k in src and v in dst:

60

del t[k]

60

del t[k]

61

# remove copies to files that were then removed

61

# remove copies to files that were then removed

62

elif k not in dst:

62

elif k not in dst:

63

del t[k]

63

del t[k]

64

65

66

def _chain(a, b):

66

def _chain(a, b):

67

"""chain two sets of copies 'a' and 'b'"""

67

"""chain two sets of copies 'a' and 'b'"""

68

t = a.copy()

68

t = a.copy()

69

for k, v in pycompat.iteritems(b):

69

for k, v in pycompat.iteritems(b):

70

if v in t:

70

if v in t:

71

t[k] = t[v]

71

t[k] = t[v]

72

else:

72

else:

73

t[k] = v

73

t[k] = v

74

return t

74

return t

75

76

77

def _tracefile(fctx, am, basemf):

77

def _tracefile(fctx, am, basemf):

78

"""return file context that is the ancestor of fctx present in ancestor

78

"""return file context that is the ancestor of fctx present in ancestor

79

manifest am

79

manifest am

80

81

Note: we used to try and stop after a given limit, however checking if that

81

Note: we used to try and stop after a given limit, however checking if that

82

limit is reached turned out to be very expensive. we are better off

82

limit is reached turned out to be very expensive. we are better off

83

disabling that feature."""

83

disabling that feature."""

84

85

for f in fctx.ancestors():

85

for f in fctx.ancestors():

86

path = f.path()

86

path = f.path()

87

if am.get(path, None) == f.filenode():

87

if am.get(path, None) == f.filenode():

88

return path

88

return path

89

if basemf and basemf.get(path, None) == f.filenode():

89

if basemf and basemf.get(path, None) == f.filenode():

90

return path

90

return path

91

92

93

def _dirstatecopies(repo, match=None):

93

def _dirstatecopies(repo, match=None):

94

ds = repo.dirstate

94

ds = repo.dirstate

95

c = ds.copies().copy()

95

c = ds.copies().copy()

96

for k in list(c):

96

for k in list(c):

97

if ds[k] not in b'anm' or (match and not match(k)):

97

if ds[k] not in b'anm' or (match and not match(k)):

98

del c[k]

98

del c[k]

99

return c

99

return c

100

101

102

def _computeforwardmissing(a, b, match=None):

102

def _computeforwardmissing(a, b, match=None):

103

"""Computes which files are in b but not a.

103

"""Computes which files are in b but not a.

104

This is its own function so extensions can easily wrap this call to see what

104

This is its own function so extensions can easily wrap this call to see what

105

files _forwardcopies is about to process.

105

files _forwardcopies is about to process.

106

"""

106

"""

107

ma = a.manifest()

107

ma = a.manifest()

108

mb = b.manifest()

108

mb = b.manifest()

109

return mb.filesnotin(ma, match=match)

109

return mb.filesnotin(ma, match=match)

110

111

112

def usechangesetcentricalgo(repo):

112

def usechangesetcentricalgo(repo):

113

"""Checks if we should use changeset-centric copy algorithms"""

113

"""Checks if we should use changeset-centric copy algorithms"""

114

if repo.filecopiesmode == b'changeset-sidedata':

114

if repo.filecopiesmode == b'changeset-sidedata':

115

return True

115

return True

116

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

116

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

changesetsource = (b'changeset-only', b'compatibility')

117

changesetsource = (b'changeset-only', b'compatibility')

118

return readfrom in changesetsource

118

return readfrom in changesetsource

119

120

121

def _committedforwardcopies(a, b, base, match):

121

def _committedforwardcopies(a, b, base, match):

122

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

122

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

# files might have to be traced back to the fctx parent of the last

123

# files might have to be traced back to the fctx parent of the last

124

# one-side-only changeset, but not further back than that

124

# one-side-only changeset, but not further back than that

125

repo = a._repo

125

repo = a._repo

126

127

if usechangesetcentricalgo(repo):

127

if usechangesetcentricalgo(repo):

128

return _changesetforwardcopies(a, b, match)

128

return _changesetforwardcopies(a, b, match)

129

130

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

130

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

dbg = repo.ui.debug

131

dbg = repo.ui.debug

132

if debug:

132

if debug:

133

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

133

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

am = a.manifest()

134

am = a.manifest()

135

basemf = None if base is None else base.manifest()

135

basemf = None if base is None else base.manifest()

136

137

# find where new files came from

137

# find where new files came from

138

# we currently don't try to find where old files went, too expensive

138

# we currently don't try to find where old files went, too expensive

139

# this means we can miss a case like 'hg rm b; hg cp a b'

139

# this means we can miss a case like 'hg rm b; hg cp a b'

140

cm = {}

140

cm = {}

141

142

# Computing the forward missing is quite expensive on large manifests, since

142

# Computing the forward missing is quite expensive on large manifests, since

143

# it compares the entire manifests. We can optimize it in the common use

143

# it compares the entire manifests. We can optimize it in the common use

144

# case of computing what copies are in a commit versus its parent (like

144

# case of computing what copies are in a commit versus its parent (like

145

# during a rebase or histedit). Note, we exclude merge commits from this

145

# during a rebase or histedit). Note, we exclude merge commits from this

146

# optimization, since the ctx.files() for a merge commit is not correct for

146

# optimization, since the ctx.files() for a merge commit is not correct for

147

# this comparison.

147

# this comparison.

148

forwardmissingmatch = match

148

forwardmissingmatch = match

149

if b.p1() == a and b.p2().node() == node.nullid:

149

if b.p1() == a and b.p2().node() == node.nullid:

150

filesmatcher = matchmod.exact(b.files())

150

filesmatcher = matchmod.exact(b.files())

151

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

151

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

152

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

153

154

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

154

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

155

156

if debug:

156

if debug:

157

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

157

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

158

159

for f in sorted(missing):

159

for f in sorted(missing):

160

if debug:

160

if debug:

161

dbg(b'debug.copies: tracing file: %s\n' % f)

161

dbg(b'debug.copies: tracing file: %s\n' % f)

162

fctx = b[f]

162

fctx = b[f]

163

fctx._ancestrycontext = ancestrycontext

163

fctx._ancestrycontext = ancestrycontext

164

165

if debug:

165

if debug:

166

start = util.timer()

166

start = util.timer()

167

opath = _tracefile(fctx, am, basemf)

167

opath = _tracefile(fctx, am, basemf)

168

if opath:

168

if opath:

169

if debug:

169

if debug:

170

dbg(b'debug.copies: rename of: %s\n' % opath)

170

dbg(b'debug.copies: rename of: %s\n' % opath)

171

cm[f] = opath

171

cm[f] = opath

172

if debug:

172

if debug:

173

dbg(

173

dbg(

174

b'debug.copies: time: %f seconds\n'

174

b'debug.copies: time: %f seconds\n'

175

% (util.timer() - start)

175

% (util.timer() - start)

176

)

176

)

177

return cm

177

return cm

178

179

180

def _revinfogetter(repo):

180

def _revinfogetter(repo):

181

"""return a function that return multiple data given a <rev>"i

181

"""return a function that return multiple data given a <rev>"i

182

183

* p1: revision number of first parent

183

* p1: revision number of first parent

184

* p2: revision number of first parent

184

* p2: revision number of first parent

185

* p1copies: mapping of copies from p1

185

* p1copies: mapping of copies from p1

186

* p2copies: mapping of copies from p2

186

* p2copies: mapping of copies from p2

187

* removed: a list of removed files

187

* removed: a list of removed files

188

"""

188

"""

189

cl = repo.changelog

189

cl = repo.changelog

190

parents = cl.parentrevs

190

parents = cl.parentrevs

191

192

if repo.filecopiesmode == b'changeset-sidedata':

192

if repo.filecopiesmode == b'changeset-sidedata':

193

changelogrevision = cl.changelogrevision

193

changelogrevision = cl.changelogrevision

194

flags = cl.flags

194

flags = cl.flags

195

196

# A small cache to avoid doing the work twice for merges

197

#

198

# In the vast majority of cases, if we ask information for a revision

199

# about 1 parent, we'll later ask it for the other. So it make sense to

200

# keep the information around when reaching the first parent of a merge

201

# and dropping it after it was provided for the second parents.

202

#

203

# It exists cases were only one parent of the merge will be walked. It

204

# happens when the "destination" the copy tracing is descendant from a

205

# new root, not common with the "source". In that case, we will only walk

206

# through merge parents that are descendant of changesets common

207

# between "source" and "destination".

208

#

209

# With the current case implementation if such changesets have a copy

210

# information, we'll keep them in memory until the end of

211

# _changesetforwardcopies. We don't expect the case to be frequent

212

# enough to matters.

213

#

214

# In addition, it would be possible to reach pathological case, were

215

# many first parent are met before any second parent is reached. In

216

# that case the cache could grow. If this even become an issue one can

217

# safely introduce a maximum cache size. This would trade extra CPU/IO

218

# time to save memory.

219

merge_caches = {}

220

196

def revinfo(rev):

221

def revinfo(rev):

197

p1, p2 = parents(rev)

222

p1, p2 = parents(rev)

198

if flags(rev) & REVIDX_SIDEDATA:

223

if flags(rev) & REVIDX_SIDEDATA:

224

e = merge_caches.pop(rev, None)

225

if e is not None:

226

return e

199

c = changelogrevision(rev)

227

c = changelogrevision(rev)

200

p1copies = c.p1copies

228

p1copies = c.p1copies

201

p2copies = c.p2copies

229

p2copies = c.p2copies

202

removed = c.filesremoved

230

removed = c.filesremoved

231

if p1 != node.nullrev and p2 != node.nullrev:

232

# XXX some case we over cache, IGNORE

233

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

203

else:

234

else:

204

p1copies = {}

235

p1copies = {}

205

p2copies = {}

236

p2copies = {}

206

removed = ()

237

removed = ()

207

return p1, p2, p1copies, p2copies, removed

238

return p1, p2, p1copies, p2copies, removed

208

239

209

else:

240

else:

210

241

211

def revinfo(rev):

242

def revinfo(rev):

212

p1, p2 = parents(rev)

243

p1, p2 = parents(rev)

213

ctx = repo[rev]

244

ctx = repo[rev]

214

p1copies, p2copies = ctx._copies

245

p1copies, p2copies = ctx._copies

215

removed = ctx.filesremoved()

246

removed = ctx.filesremoved()

216

return p1, p2, p1copies, p2copies, removed

247

return p1, p2, p1copies, p2copies, removed

217

248

218

return revinfo

249

return revinfo

219

250

220

251

221

def _changesetforwardcopies(a, b, match):

252

def _changesetforwardcopies(a, b, match):

222

if a.rev() in (node.nullrev, b.rev()):

253

if a.rev() in (node.nullrev, b.rev()):

223

return {}

254

return {}

224

255

225

repo = a.repo().unfiltered()

256

repo = a.repo().unfiltered()

226

children = {}

257

children = {}

227

revinfo = _revinfogetter(repo)

258

revinfo = _revinfogetter(repo)

228

259

229

cl = repo.changelog

260

cl = repo.changelog

230

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

261

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

231

mrset = set(missingrevs)

262

mrset = set(missingrevs)

232

roots = set()

263

roots = set()

233

for r in missingrevs:

264

for r in missingrevs:

234

for p in cl.parentrevs(r):

265

for p in cl.parentrevs(r):

235

if p == node.nullrev:

266

if p == node.nullrev:

236

continue

267

continue

237

if p not in children:

268

if p not in children:

238

children[p] = [r]

269

children[p] = [r]

239

else:

270

else:

240

children[p].append(r)

271

children[p].append(r)

241

if p not in mrset:

272

if p not in mrset:

242

roots.add(p)

273

roots.add(p)

243

if not roots:

274

if not roots:

244

# no common revision to track copies from

275

# no common revision to track copies from

245

return {}

276

return {}

246

min_root = min(roots)

277

min_root = min(roots)

247

278

248

from_head = set(

279

from_head = set(

249

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

280

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

250

)

281

)

251

282

252

iterrevs = set(from_head)

283

iterrevs = set(from_head)

253

iterrevs &= mrset

284

iterrevs &= mrset

254

iterrevs.update(roots)

285

iterrevs.update(roots)

255

iterrevs.remove(b.rev())

286

iterrevs.remove(b.rev())

256

all_copies = {r: {} for r in roots}

287

all_copies = {r: {} for r in roots}

257

alwaysmatch = match.always()

288

alwaysmatch = match.always()

258

for r in sorted(iterrevs):

289

for r in sorted(iterrevs):

259

copies = all_copies.pop(r)

290

copies = all_copies.pop(r)

260

for i, c in enumerate(children[r]):

291

for i, c in enumerate(children[r]):

261

p1, p2, p1copies, p2copies, removed = revinfo(c)

292

p1, p2, p1copies, p2copies, removed = revinfo(c)

262

if r == p1:

293

if r == p1:

263

parent = 1

294

parent = 1

264

childcopies = p1copies

295

childcopies = p1copies

265

else:

296

else:

266

assert r == p2

297

assert r == p2

267

parent = 2

298

parent = 2

268

childcopies = p2copies

299

childcopies = p2copies

269

if not alwaysmatch:

300

if not alwaysmatch:

270

childcopies = {

301

childcopies = {

271

dst: src for dst, src in childcopies.items() if match(dst)

302

dst: src for dst, src in childcopies.items() if match(dst)

272

}

303

}

273

newcopies = copies

304

newcopies = copies

274

if childcopies:

305

if childcopies:

275

newcopies = _chain(newcopies, childcopies)

306

newcopies = _chain(newcopies, childcopies)

276

# _chain makes a copies, we can avoid doing so in some

307

# _chain makes a copies, we can avoid doing so in some

277

# simple/linear cases.

308

# simple/linear cases.

278

assert newcopies is not copies

309

assert newcopies is not copies

279

for f in removed:

310

for f in removed:

280

if f in newcopies:

311

if f in newcopies:

281

if newcopies is copies:

312

if newcopies is copies:

282

# copy on write to avoid affecting potential other

313

# copy on write to avoid affecting potential other

283

# branches. when there are no other branches, this

314

# branches. when there are no other branches, this

284

# could be avoided.

315

# could be avoided.

285

newcopies = copies.copy()

316

newcopies = copies.copy()

286

del newcopies[f]

317

del newcopies[f]

287

othercopies = all_copies.get(c)

318

othercopies = all_copies.get(c)

288

if othercopies is None:

319

if othercopies is None:

289

all_copies[c] = newcopies

320

all_copies[c] = newcopies

290

else:

321

else:

291

# we are the second parent to work on c, we need to merge our

322

# we are the second parent to work on c, we need to merge our

292

# work with the other.

323

# work with the other.

293

#

324

#

294

# Unlike when copies are stored in the filelog, we consider

325

# Unlike when copies are stored in the filelog, we consider

295

# it a copy even if the destination already existed on the

326

# it a copy even if the destination already existed on the

296

# other branch. It's simply too expensive to check if the

327

# other branch. It's simply too expensive to check if the

297

# file existed in the manifest.

328

# file existed in the manifest.

298

#

329

#

299

# In case of conflict, parent 1 take precedence over parent 2.

330

# In case of conflict, parent 1 take precedence over parent 2.

300

# This is an arbitrary choice made anew when implementing

331

# This is an arbitrary choice made anew when implementing

301

# changeset based copies. It was made without regards with

332

# changeset based copies. It was made without regards with

302

# potential filelog related behavior.

333

# potential filelog related behavior.

303

if parent == 1:

334

if parent == 1:

304

othercopies.update(newcopies)

335

othercopies.update(newcopies)

305

else:

336

else:

306

newcopies.update(othercopies)

337

newcopies.update(othercopies)

307

all_copies[c] = newcopies

338

all_copies[c] = newcopies

308

return all_copies[b.rev()]

339

return all_copies[b.rev()]

309

340

310

341

311

def _forwardcopies(a, b, base=None, match=None):

342

def _forwardcopies(a, b, base=None, match=None):

312

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

343

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

313

344

314

if base is None:

345

if base is None:

315

base = a

346

base = a

316

match = a.repo().narrowmatch(match)

347

match = a.repo().narrowmatch(match)

317

# check for working copy

348

# check for working copy

318

if b.rev() is None:

349

if b.rev() is None:

319

cm = _committedforwardcopies(a, b.p1(), base, match)

350

cm = _committedforwardcopies(a, b.p1(), base, match)

320

# combine copies from dirstate if necessary

351

# combine copies from dirstate if necessary

321

copies = _chain(cm, _dirstatecopies(b._repo, match))

352

copies = _chain(cm, _dirstatecopies(b._repo, match))

322

else:

353

else:

323

copies = _committedforwardcopies(a, b, base, match)

354

copies = _committedforwardcopies(a, b, base, match)

324

return copies

355

return copies

325

356

326

357

327

def _backwardrenames(a, b, match):

358

def _backwardrenames(a, b, match):

328

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

359

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

329

return {}

360

return {}

330

361

331

# Even though we're not taking copies into account, 1:n rename situations

362

# Even though we're not taking copies into account, 1:n rename situations

332

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

363

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

333

# arbitrarily pick one of the renames.

364

# arbitrarily pick one of the renames.

334

# We don't want to pass in "match" here, since that would filter

365

# We don't want to pass in "match" here, since that would filter

335

# the destination by it. Since we're reversing the copies, we want

366

# the destination by it. Since we're reversing the copies, we want

336

# to filter the source instead.

367

# to filter the source instead.

337

f = _forwardcopies(b, a)

368

f = _forwardcopies(b, a)

338

r = {}

369

r = {}

339

for k, v in sorted(pycompat.iteritems(f)):

370

for k, v in sorted(pycompat.iteritems(f)):

340

if match and not match(v):

371

if match and not match(v):

341

continue

372

continue

342

# remove copies

373

# remove copies

343

if v in a:

374

if v in a:

344

continue

375

continue

345

r[v] = k

376

r[v] = k

346

return r

377

return r

347

378

348

379

349

def pathcopies(x, y, match=None):

380

def pathcopies(x, y, match=None):

350

"""find {dst@y: src@x} copy mapping for directed compare"""

381

"""find {dst@y: src@x} copy mapping for directed compare"""

351

repo = x._repo

382

repo = x._repo

352

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

383

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

353

if debug:

384

if debug:

354

repo.ui.debug(

385

repo.ui.debug(

355

b'debug.copies: searching copies from %s to %s\n' % (x, y)

386

b'debug.copies: searching copies from %s to %s\n' % (x, y)

356

)

387

)

357

if x == y or not x or not y:

388

if x == y or not x or not y:

358

return {}

389

return {}

359

a = y.ancestor(x)

390

a = y.ancestor(x)

360

if a == x:

391

if a == x:

361

if debug:

392

if debug:

362

repo.ui.debug(b'debug.copies: search mode: forward\n')

393

repo.ui.debug(b'debug.copies: search mode: forward\n')

363

if y.rev() is None and x == y.p1():

394

if y.rev() is None and x == y.p1():

364

# short-circuit to avoid issues with merge states

395

# short-circuit to avoid issues with merge states

365

return _dirstatecopies(repo, match)

396

return _dirstatecopies(repo, match)

366

copies = _forwardcopies(x, y, match=match)

397

copies = _forwardcopies(x, y, match=match)

367

elif a == y:

398

elif a == y:

368

if debug:

399

if debug:

369

repo.ui.debug(b'debug.copies: search mode: backward\n')

400

repo.ui.debug(b'debug.copies: search mode: backward\n')

370

copies = _backwardrenames(x, y, match=match)

401

copies = _backwardrenames(x, y, match=match)

371

else:

402

else:

372

if debug:

403

if debug:

373

repo.ui.debug(b'debug.copies: search mode: combined\n')

404

repo.ui.debug(b'debug.copies: search mode: combined\n')

374

base = None

405

base = None

375

if a.rev() != node.nullrev:

406

if a.rev() != node.nullrev:

376

base = x

407

base = x

377

copies = _chain(

408

copies = _chain(

378

_backwardrenames(x, a, match=match),

409

_backwardrenames(x, a, match=match),

379

_forwardcopies(a, y, base, match=match),

410

_forwardcopies(a, y, base, match=match),

380

)

411

)

381

_filter(x, y, copies)

412

_filter(x, y, copies)

382

return copies

413

return copies

383

414

384

415

385

def mergecopies(repo, c1, c2, base):

416

def mergecopies(repo, c1, c2, base):

386

"""

417

"""

387

Finds moves and copies between context c1 and c2 that are relevant for

418

Finds moves and copies between context c1 and c2 that are relevant for

388

merging. 'base' will be used as the merge base.

419

merging. 'base' will be used as the merge base.

389

420

390

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

421

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

391

files that were moved/ copied in one merge parent and modified in another.

422

files that were moved/ copied in one merge parent and modified in another.

392

For example:

423

For example:

393

424

394

o ---> 4 another commit

425

o ---> 4 another commit

395

|

426

|

396

| o ---> 3 commit that modifies a.txt

427

| o ---> 3 commit that modifies a.txt

397

| /

428

| /

398

o / ---> 2 commit that moves a.txt to b.txt

429

o / ---> 2 commit that moves a.txt to b.txt

399

|/

430

|/

400

o ---> 1 merge base

431

o ---> 1 merge base

401

432

402

If we try to rebase revision 3 on revision 4, since there is no a.txt in

433

If we try to rebase revision 3 on revision 4, since there is no a.txt in

403

revision 4, and if user have copytrace disabled, we prints the following

434

revision 4, and if user have copytrace disabled, we prints the following

404

message:

435

message:

405

436

406

```other changed <file> which local deleted```

437

```other changed <file> which local deleted```

407

438

408

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

439

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

409

"dirmove".

440

"dirmove".

410

441

411

"copy" is a mapping from destination name -> source name,

442

"copy" is a mapping from destination name -> source name,

412

where source is in c1 and destination is in c2 or vice-versa.

443

where source is in c1 and destination is in c2 or vice-versa.

413

444

414

"movewithdir" is a mapping from source name -> destination name,

445

"movewithdir" is a mapping from source name -> destination name,

415

where the file at source present in one context but not the other

446

where the file at source present in one context but not the other

416

needs to be moved to destination by the merge process, because the

447

needs to be moved to destination by the merge process, because the

417

other context moved the directory it is in.

448

other context moved the directory it is in.

418

449

419

"diverge" is a mapping of source name -> list of destination names

450

"diverge" is a mapping of source name -> list of destination names

420

for divergent renames.

451

for divergent renames.

421

452

422

"renamedelete" is a mapping of source name -> list of destination

453

"renamedelete" is a mapping of source name -> list of destination

423

names for files deleted in c1 that were renamed in c2 or vice-versa.

454

names for files deleted in c1 that were renamed in c2 or vice-versa.

424

455

425

"dirmove" is a mapping of detected source dir -> destination dir renames.

456

"dirmove" is a mapping of detected source dir -> destination dir renames.

426

This is needed for handling changes to new files previously grafted into

457

This is needed for handling changes to new files previously grafted into

427

renamed directories.

458

renamed directories.

428

459

429

This function calls different copytracing algorithms based on config.

460

This function calls different copytracing algorithms based on config.

430

"""

461

"""

431

# avoid silly behavior for update from empty dir

462

# avoid silly behavior for update from empty dir

432

if not c1 or not c2 or c1 == c2:

463

if not c1 or not c2 or c1 == c2:

433

return {}, {}, {}, {}, {}

464

return {}, {}, {}, {}, {}

434

465

435

narrowmatch = c1.repo().narrowmatch()

466

narrowmatch = c1.repo().narrowmatch()

436

467

437

# avoid silly behavior for parent -> working dir

468

# avoid silly behavior for parent -> working dir

438

if c2.node() is None and c1.node() == repo.dirstate.p1():

469

if c2.node() is None and c1.node() == repo.dirstate.p1():

439

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

470

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

440

471

441

copytracing = repo.ui.config(b'experimental', b'copytrace')

472

copytracing = repo.ui.config(b'experimental', b'copytrace')

442

if stringutil.parsebool(copytracing) is False:

473

if stringutil.parsebool(copytracing) is False:

443

# stringutil.parsebool() returns None when it is unable to parse the

474

# stringutil.parsebool() returns None when it is unable to parse the

444

# value, so we should rely on making sure copytracing is on such cases

475

# value, so we should rely on making sure copytracing is on such cases

445

return {}, {}, {}, {}, {}

476

return {}, {}, {}, {}, {}

446

477

447

if usechangesetcentricalgo(repo):

478

if usechangesetcentricalgo(repo):

448

# The heuristics don't make sense when we need changeset-centric algos

479

# The heuristics don't make sense when we need changeset-centric algos

449

return _fullcopytracing(repo, c1, c2, base)

480

return _fullcopytracing(repo, c1, c2, base)

450

481

451

# Copy trace disabling is explicitly below the node == p1 logic above

482

# Copy trace disabling is explicitly below the node == p1 logic above

452

# because the logic above is required for a simple copy to be kept across a

483

# because the logic above is required for a simple copy to be kept across a

453

# rebase.

484

# rebase.

454

if copytracing == b'heuristics':

485

if copytracing == b'heuristics':

455

# Do full copytracing if only non-public revisions are involved as

486

# Do full copytracing if only non-public revisions are involved as

456

# that will be fast enough and will also cover the copies which could

487

# that will be fast enough and will also cover the copies which could

457

# be missed by heuristics

488

# be missed by heuristics

458

if _isfullcopytraceable(repo, c1, base):

489

if _isfullcopytraceable(repo, c1, base):

459

return _fullcopytracing(repo, c1, c2, base)

490

return _fullcopytracing(repo, c1, c2, base)

460

return _heuristicscopytracing(repo, c1, c2, base)

491

return _heuristicscopytracing(repo, c1, c2, base)

461

else:

492

else:

462

return _fullcopytracing(repo, c1, c2, base)

493

return _fullcopytracing(repo, c1, c2, base)

463

494

464

495

465

def _isfullcopytraceable(repo, c1, base):

496

def _isfullcopytraceable(repo, c1, base):

466

""" Checks that if base, source and destination are all no-public branches,

497

""" Checks that if base, source and destination are all no-public branches,

467

if yes let's use the full copytrace algorithm for increased capabilities

498

if yes let's use the full copytrace algorithm for increased capabilities

468

since it will be fast enough.

499

since it will be fast enough.

469

500

470

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

501

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

471

number of changesets from c1 to base such that if number of changesets are

502

number of changesets from c1 to base such that if number of changesets are

472

more than the limit, full copytracing algorithm won't be used.

503

more than the limit, full copytracing algorithm won't be used.

473

"""

504

"""

474

if c1.rev() is None:

505

if c1.rev() is None:

475

c1 = c1.p1()

506

c1 = c1.p1()

476

if c1.mutable() and base.mutable():

507

if c1.mutable() and base.mutable():

477

sourcecommitlimit = repo.ui.configint(

508

sourcecommitlimit = repo.ui.configint(

478

b'experimental', b'copytrace.sourcecommitlimit'

509

b'experimental', b'copytrace.sourcecommitlimit'

479

)

510

)

480

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

511

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

481

return commits < sourcecommitlimit

512

return commits < sourcecommitlimit

482

return False

513

return False

483

514

484

515

485

def _checksinglesidecopies(

516

def _checksinglesidecopies(

486

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

517

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

487

):

518

):

488

if src not in m2:

519

if src not in m2:

489

# deleted on side 2

520

# deleted on side 2

490

if src not in m1:

521

if src not in m1:

491

# renamed on side 1, deleted on side 2

522

# renamed on side 1, deleted on side 2

492

renamedelete[src] = dsts1

523

renamedelete[src] = dsts1

493

elif m2[src] != mb[src]:

524

elif m2[src] != mb[src]:

494

if not _related(c2[src], base[src]):

525

if not _related(c2[src], base[src]):

495

return

526

return

496

# modified on side 2

527

# modified on side 2

497

for dst in dsts1:

528

for dst in dsts1:

498

if dst not in m2:

529

if dst not in m2:

499

# dst not added on side 2 (handle as regular

530

# dst not added on side 2 (handle as regular

500

# "both created" case in manifestmerge otherwise)

531

# "both created" case in manifestmerge otherwise)

501

copy[dst] = src

532

copy[dst] = src

502

533

503

534

504

def _fullcopytracing(repo, c1, c2, base):

535

def _fullcopytracing(repo, c1, c2, base):

505

""" The full copytracing algorithm which finds all the new files that were

536

""" The full copytracing algorithm which finds all the new files that were

506

added from merge base up to the top commit and for each file it checks if

537

added from merge base up to the top commit and for each file it checks if

507

this file was copied from another file.

538

this file was copied from another file.

508

539

509

This is pretty slow when a lot of changesets are involved but will track all

540

This is pretty slow when a lot of changesets are involved but will track all

510

the copies.

541

the copies.

511

"""

542

"""

512

m1 = c1.manifest()

543

m1 = c1.manifest()

513

m2 = c2.manifest()

544

m2 = c2.manifest()

514

mb = base.manifest()

545

mb = base.manifest()

515

546

516

copies1 = pathcopies(base, c1)

547

copies1 = pathcopies(base, c1)

517

copies2 = pathcopies(base, c2)

548

copies2 = pathcopies(base, c2)

518

549

519

inversecopies1 = {}

550

inversecopies1 = {}

520

inversecopies2 = {}

551

inversecopies2 = {}

521

for dst, src in copies1.items():

552

for dst, src in copies1.items():

522

inversecopies1.setdefault(src, []).append(dst)

553

inversecopies1.setdefault(src, []).append(dst)

523

for dst, src in copies2.items():

554

for dst, src in copies2.items():

524

inversecopies2.setdefault(src, []).append(dst)

555

inversecopies2.setdefault(src, []).append(dst)

525

556

526

copy = {}

557

copy = {}

527

diverge = {}

558

diverge = {}

528

renamedelete = {}

559

renamedelete = {}

529

allsources = set(inversecopies1) | set(inversecopies2)

560

allsources = set(inversecopies1) | set(inversecopies2)

530

for src in allsources:

561

for src in allsources:

531

dsts1 = inversecopies1.get(src)

562

dsts1 = inversecopies1.get(src)

532

dsts2 = inversecopies2.get(src)

563

dsts2 = inversecopies2.get(src)

533

if dsts1 and dsts2:

564

if dsts1 and dsts2:

534

# copied/renamed on both sides

565

# copied/renamed on both sides

535

if src not in m1 and src not in m2:

566

if src not in m1 and src not in m2:

536

# renamed on both sides

567

# renamed on both sides

537

dsts1 = set(dsts1)

568

dsts1 = set(dsts1)

538

dsts2 = set(dsts2)

569

dsts2 = set(dsts2)

539

# If there's some overlap in the rename destinations, we

570

# If there's some overlap in the rename destinations, we

540

# consider it not divergent. For example, if side 1 copies 'a'

571

# consider it not divergent. For example, if side 1 copies 'a'

541

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

572

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

542

# and 'd' and deletes 'a'.

573

# and 'd' and deletes 'a'.

543

if dsts1 & dsts2:

574

if dsts1 & dsts2:

544

for dst in dsts1 & dsts2:

575

for dst in dsts1 & dsts2:

545

copy[dst] = src

576

copy[dst] = src

546

else:

577

else:

547

diverge[src] = sorted(dsts1 | dsts2)

578

diverge[src] = sorted(dsts1 | dsts2)

548

elif src in m1 and src in m2:

579

elif src in m1 and src in m2:

549

# copied on both sides

580

# copied on both sides

550

dsts1 = set(dsts1)

581

dsts1 = set(dsts1)

551

dsts2 = set(dsts2)

582

dsts2 = set(dsts2)

552

for dst in dsts1 & dsts2:

583

for dst in dsts1 & dsts2:

553

copy[dst] = src

584

copy[dst] = src

554

# TODO: Handle cases where it was renamed on one side and copied

585

# TODO: Handle cases where it was renamed on one side and copied

555

# on the other side

586

# on the other side

556

elif dsts1:

587

elif dsts1:

557

# copied/renamed only on side 1

588

# copied/renamed only on side 1

558

_checksinglesidecopies(

589

_checksinglesidecopies(

559

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

590

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

560

)

591

)

561

elif dsts2:

592

elif dsts2:

562

# copied/renamed only on side 2

593

# copied/renamed only on side 2

563

_checksinglesidecopies(

594

_checksinglesidecopies(

564

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

595

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

565

)

596

)

566

597

567

renamedeleteset = set()

598

renamedeleteset = set()

568

divergeset = set()

599

divergeset = set()

569

for dsts in diverge.values():

600

for dsts in diverge.values():

570

divergeset.update(dsts)

601

divergeset.update(dsts)

571

for dsts in renamedelete.values():

602

for dsts in renamedelete.values():

572

renamedeleteset.update(dsts)

603

renamedeleteset.update(dsts)

573

604

574

# find interesting file sets from manifests

605

# find interesting file sets from manifests

575

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

606

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

576

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

607

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

577

u1 = sorted(addedinm1 - addedinm2)

608

u1 = sorted(addedinm1 - addedinm2)

578

u2 = sorted(addedinm2 - addedinm1)

609

u2 = sorted(addedinm2 - addedinm1)

579

610

580

header = b" unmatched files in %s"

611

header = b" unmatched files in %s"

581

if u1:

612

if u1:

582

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

613

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

583

if u2:

614

if u2:

584

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

615

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

585

616

586

fullcopy = copies1.copy()

617

fullcopy = copies1.copy()

587

fullcopy.update(copies2)

618

fullcopy.update(copies2)

588

if not fullcopy:

619

if not fullcopy:

589

return copy, {}, diverge, renamedelete, {}

620

return copy, {}, diverge, renamedelete, {}

590

621

591

if repo.ui.debugflag:

622

if repo.ui.debugflag:

592

repo.ui.debug(

623

repo.ui.debug(

593

b" all copies found (* = to merge, ! = divergent, "

624

b" all copies found (* = to merge, ! = divergent, "

594

b"% = renamed and deleted):\n"

625

b"% = renamed and deleted):\n"

595

)

626

)

596

for f in sorted(fullcopy):

627

for f in sorted(fullcopy):

597

note = b""

628

note = b""

598

if f in copy:

629

if f in copy:

599

note += b"*"

630

note += b"*"

600

if f in divergeset:

631

if f in divergeset:

601

note += b"!"

632

note += b"!"

602

if f in renamedeleteset:

633

if f in renamedeleteset:

603

note += b"%"

634

note += b"%"

604

repo.ui.debug(

635

repo.ui.debug(

605

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

636

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

606

)

637

)

607

del divergeset

638

del divergeset

608

639

609

repo.ui.debug(b" checking for directory renames\n")

640

repo.ui.debug(b" checking for directory renames\n")

610

641

611

# generate a directory move map

642

# generate a directory move map

612

d1, d2 = c1.dirs(), c2.dirs()

643

d1, d2 = c1.dirs(), c2.dirs()

613

invalid = set()

644

invalid = set()

614

dirmove = {}

645

dirmove = {}

615

646

616

# examine each file copy for a potential directory move, which is

647

# examine each file copy for a potential directory move, which is

617

# when all the files in a directory are moved to a new directory

648

# when all the files in a directory are moved to a new directory

618

for dst, src in pycompat.iteritems(fullcopy):

649

for dst, src in pycompat.iteritems(fullcopy):

619

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

650

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

620

if dsrc in invalid:

651

if dsrc in invalid:

621

# already seen to be uninteresting

652

# already seen to be uninteresting

622

continue

653

continue

623

elif dsrc in d1 and ddst in d1:

654

elif dsrc in d1 and ddst in d1:

624

# directory wasn't entirely moved locally

655

# directory wasn't entirely moved locally

625

invalid.add(dsrc)

656

invalid.add(dsrc)

626

elif dsrc in d2 and ddst in d2:

657

elif dsrc in d2 and ddst in d2:

627

# directory wasn't entirely moved remotely

658

# directory wasn't entirely moved remotely

628

invalid.add(dsrc)

659

invalid.add(dsrc)

629

elif dsrc in dirmove and dirmove[dsrc] != ddst:

660

elif dsrc in dirmove and dirmove[dsrc] != ddst:

630

# files from the same directory moved to two different places

661

# files from the same directory moved to two different places

631

invalid.add(dsrc)

662

invalid.add(dsrc)

632

else:

663

else:

633

# looks good so far

664

# looks good so far

634

dirmove[dsrc] = ddst

665

dirmove[dsrc] = ddst

635

666

636

for i in invalid:

667

for i in invalid:

637

if i in dirmove:

668

if i in dirmove:

638

del dirmove[i]

669

del dirmove[i]

639

del d1, d2, invalid

670

del d1, d2, invalid

640

671

641

if not dirmove:

672

if not dirmove:

642

return copy, {}, diverge, renamedelete, {}

673

return copy, {}, diverge, renamedelete, {}

643

674

644

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

675

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

645

676

646

for d in dirmove:

677

for d in dirmove:

647

repo.ui.debug(

678

repo.ui.debug(

648

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

679

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

649

)

680

)

650

681

651

movewithdir = {}

682

movewithdir = {}

652

# check unaccounted nonoverlapping files against directory moves

683

# check unaccounted nonoverlapping files against directory moves

653

for f in u1 + u2:

684

for f in u1 + u2:

654

if f not in fullcopy:

685

if f not in fullcopy:

655

for d in dirmove:

686

for d in dirmove:

656

if f.startswith(d):

687

if f.startswith(d):

657

# new file added in a directory that was moved, move it

688

# new file added in a directory that was moved, move it

658

df = dirmove[d] + f[len(d) :]

689

df = dirmove[d] + f[len(d) :]

659

if df not in copy:

690

if df not in copy:

660

movewithdir[f] = df

691

movewithdir[f] = df

661

repo.ui.debug(

692

repo.ui.debug(

662

b" pending file src: '%s' -> dst: '%s'\n"

693

b" pending file src: '%s' -> dst: '%s'\n"

663

% (f, df)

694

% (f, df)

664

)

695

)

665

break

696

break

666

697

667

return copy, movewithdir, diverge, renamedelete, dirmove

698

return copy, movewithdir, diverge, renamedelete, dirmove

668

699

669

700

670

def _heuristicscopytracing(repo, c1, c2, base):

701

def _heuristicscopytracing(repo, c1, c2, base):

671

""" Fast copytracing using filename heuristics

702

""" Fast copytracing using filename heuristics

672

703

673

Assumes that moves or renames are of following two types:

704

Assumes that moves or renames are of following two types:

674

705

675

1) Inside a directory only (same directory name but different filenames)

706

1) Inside a directory only (same directory name but different filenames)

676

2) Move from one directory to another

707

2) Move from one directory to another

677

(same filenames but different directory names)

708

(same filenames but different directory names)

678

709

679

Works only when there are no merge commits in the "source branch".

710

Works only when there are no merge commits in the "source branch".

680

Source branch is commits from base up to c2 not including base.

711

Source branch is commits from base up to c2 not including base.

681

712

682

If merge is involved it fallbacks to _fullcopytracing().

713

If merge is involved it fallbacks to _fullcopytracing().

683

714

684

Can be used by setting the following config:

715

Can be used by setting the following config:

685

716

686

[experimental]

717

[experimental]

687

copytrace = heuristics

718

copytrace = heuristics

688

719

689

In some cases the copy/move candidates found by heuristics can be very large

720

In some cases the copy/move candidates found by heuristics can be very large

690

in number and that will make the algorithm slow. The number of possible

721

in number and that will make the algorithm slow. The number of possible

691

candidates to check can be limited by using the config

722

candidates to check can be limited by using the config

692

`experimental.copytrace.movecandidateslimit` which defaults to 100.

723

`experimental.copytrace.movecandidateslimit` which defaults to 100.

693

"""

724

"""

694

725

695

if c1.rev() is None:

726

if c1.rev() is None:

696

c1 = c1.p1()

727

c1 = c1.p1()

697

if c2.rev() is None:

728

if c2.rev() is None:

698

c2 = c2.p1()

729

c2 = c2.p1()

699

730

700

copies = {}

731

copies = {}

701

732

702

changedfiles = set()

733

changedfiles = set()

703

m1 = c1.manifest()

734

m1 = c1.manifest()

704

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

735

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

705

# If base is not in c2 branch, we switch to fullcopytracing

736

# If base is not in c2 branch, we switch to fullcopytracing

706

repo.ui.debug(

737

repo.ui.debug(

707

b"switching to full copytracing as base is not "

738

b"switching to full copytracing as base is not "

708

b"an ancestor of c2\n"

739

b"an ancestor of c2\n"

709

)

740

)

710

return _fullcopytracing(repo, c1, c2, base)

741

return _fullcopytracing(repo, c1, c2, base)

711

742

712

ctx = c2

743

ctx = c2

713

while ctx != base:

744

while ctx != base:

714

if len(ctx.parents()) == 2:

745

if len(ctx.parents()) == 2:

715

# To keep things simple let's not handle merges

746

# To keep things simple let's not handle merges

716

repo.ui.debug(b"switching to full copytracing because of merges\n")

747

repo.ui.debug(b"switching to full copytracing because of merges\n")

717

return _fullcopytracing(repo, c1, c2, base)

748

return _fullcopytracing(repo, c1, c2, base)

718

changedfiles.update(ctx.files())

749

changedfiles.update(ctx.files())

719

ctx = ctx.p1()

750

ctx = ctx.p1()

720

751

721

cp = _forwardcopies(base, c2)

752

cp = _forwardcopies(base, c2)

722

for dst, src in pycompat.iteritems(cp):

753

for dst, src in pycompat.iteritems(cp):

723

if src in m1:

754

if src in m1:

724

copies[dst] = src

755

copies[dst] = src

725

756

726

# file is missing if it isn't present in the destination, but is present in

757

# file is missing if it isn't present in the destination, but is present in

727

# the base and present in the source.

758

# the base and present in the source.

728

# Presence in the base is important to exclude added files, presence in the

759

# Presence in the base is important to exclude added files, presence in the

729

# source is important to exclude removed files.

760

# source is important to exclude removed files.

730

filt = lambda f: f not in m1 and f in base and f in c2

761

filt = lambda f: f not in m1 and f in base and f in c2

731

missingfiles = [f for f in changedfiles if filt(f)]

762

missingfiles = [f for f in changedfiles if filt(f)]

732

763

733

if missingfiles:

764

if missingfiles:

734

basenametofilename = collections.defaultdict(list)

765

basenametofilename = collections.defaultdict(list)

735

dirnametofilename = collections.defaultdict(list)

766

dirnametofilename = collections.defaultdict(list)

736

767

737

for f in m1.filesnotin(base.manifest()):

768

for f in m1.filesnotin(base.manifest()):

738

basename = os.path.basename(f)

769

basename = os.path.basename(f)

739

dirname = os.path.dirname(f)

770

dirname = os.path.dirname(f)

740

basenametofilename[basename].append(f)

771

basenametofilename[basename].append(f)

741

dirnametofilename[dirname].append(f)

772

dirnametofilename[dirname].append(f)

742

773

743

for f in missingfiles:

774

for f in missingfiles:

744

basename = os.path.basename(f)

775

basename = os.path.basename(f)

745

dirname = os.path.dirname(f)

776

dirname = os.path.dirname(f)

746

samebasename = basenametofilename[basename]

777

samebasename = basenametofilename[basename]

747

samedirname = dirnametofilename[dirname]

778

samedirname = dirnametofilename[dirname]

748

movecandidates = samebasename + samedirname

779

movecandidates = samebasename + samedirname

749

# f is guaranteed to be present in c2, that's why

780

# f is guaranteed to be present in c2, that's why

750

# c2.filectx(f) won't fail

781

# c2.filectx(f) won't fail

751

f2 = c2.filectx(f)

782

f2 = c2.filectx(f)

752

# we can have a lot of candidates which can slow down the heuristics

783

# we can have a lot of candidates which can slow down the heuristics

753

# config value to limit the number of candidates moves to check

784

# config value to limit the number of candidates moves to check

754

maxcandidates = repo.ui.configint(

785

maxcandidates = repo.ui.configint(

755

b'experimental', b'copytrace.movecandidateslimit'

786

b'experimental', b'copytrace.movecandidateslimit'

756

)

787

)

757

788

758

if len(movecandidates) > maxcandidates:

789

if len(movecandidates) > maxcandidates:

759

repo.ui.status(

790

repo.ui.status(

760

_(

791

_(

761

b"skipping copytracing for '%s', more "

792

b"skipping copytracing for '%s', more "

762

b"candidates than the limit: %d\n"

793

b"candidates than the limit: %d\n"

763

)

794

)

764

% (f, len(movecandidates))

795

% (f, len(movecandidates))

765

)

796

)

766

continue

797

continue

767

798

768

for candidate in movecandidates:

799

for candidate in movecandidates:

769

f1 = c1.filectx(candidate)

800

f1 = c1.filectx(candidate)

770

if _related(f1, f2):

801

if _related(f1, f2):

771

# if there are a few related copies then we'll merge

802

# if there are a few related copies then we'll merge

772

# changes into all of them. This matches the behaviour

803

# changes into all of them. This matches the behaviour

773

# of upstream copytracing

804

# of upstream copytracing

774

copies[candidate] = f

805

copies[candidate] = f

775

806

776

return copies, {}, {}, {}, {}

807

return copies, {}, {}, {}, {}

777

808

778

809

779

def _related(f1, f2):

810

def _related(f1, f2):

780

"""return True if f1 and f2 filectx have a common ancestor

811

"""return True if f1 and f2 filectx have a common ancestor

781

812

782

Walk back to common ancestor to see if the two files originate

813

Walk back to common ancestor to see if the two files originate

783

from the same file. Since workingfilectx's rev() is None it messes

814

from the same file. Since workingfilectx's rev() is None it messes

784

up the integer comparison logic, hence the pre-step check for

815

up the integer comparison logic, hence the pre-step check for

785

None (f1 and f2 can only be workingfilectx's initially).

816

None (f1 and f2 can only be workingfilectx's initially).

786

"""

817

"""

787

818

788

if f1 == f2:

819

if f1 == f2:

789

return True # a match

820

return True # a match

790

821

791

g1, g2 = f1.ancestors(), f2.ancestors()

822

g1, g2 = f1.ancestors(), f2.ancestors()

792

try:

823

try:

793

f1r, f2r = f1.linkrev(), f2.linkrev()

824

f1r, f2r = f1.linkrev(), f2.linkrev()

794

825

795

if f1r is None:

826

if f1r is None:

796

f1 = next(g1)

827

f1 = next(g1)

797

if f2r is None:

828

if f2r is None:

798

f2 = next(g2)

829

f2 = next(g2)

799

830

800

while True:

831

while True:

801

f1r, f2r = f1.linkrev(), f2.linkrev()

832

f1r, f2r = f1.linkrev(), f2.linkrev()

802

if f1r > f2r:

833

if f1r > f2r:

803

f1 = next(g1)

834

f1 = next(g1)

804

elif f2r > f1r:

835

elif f2r > f1r:

805

f2 = next(g2)

836

f2 = next(g2)

806

else: # f1 and f2 point to files in the same linkrev

837

else: # f1 and f2 point to files in the same linkrev

807

return f1 == f2 # true if they point to the same file

838

return f1 == f2 # true if they point to the same file

808

except StopIteration:

839

except StopIteration:

809

return False

840

return False

810

841

811

842

812

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

843

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

813

"""reproduce copies from fromrev to rev in the dirstate

844

"""reproduce copies from fromrev to rev in the dirstate

814

845

815

If skiprev is specified, it's a revision that should be used to

846

If skiprev is specified, it's a revision that should be used to

816

filter copy records. Any copies that occur between fromrev and

847

filter copy records. Any copies that occur between fromrev and

817

skiprev will not be duplicated, even if they appear in the set of

848

skiprev will not be duplicated, even if they appear in the set of

818

copies between fromrev and rev.

849

copies between fromrev and rev.

819

"""

850

"""

820

exclude = {}

851

exclude = {}

821

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

852

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

822

bctrace = stringutil.parsebool(ctraceconfig)

853

bctrace = stringutil.parsebool(ctraceconfig)

823

if skiprev is not None and (

854

if skiprev is not None and (

824

ctraceconfig == b'heuristics' or bctrace or bctrace is None

855

ctraceconfig == b'heuristics' or bctrace or bctrace is None

825

):

856

):

826

# copytrace='off' skips this line, but not the entire function because

857

# copytrace='off' skips this line, but not the entire function because

827

# the line below is O(size of the repo) during a rebase, while the rest

858

# the line below is O(size of the repo) during a rebase, while the rest

828

# of the function is much faster (and is required for carrying copy

859

# of the function is much faster (and is required for carrying copy

829

# metadata across the rebase anyway).

860

# metadata across the rebase anyway).

830

exclude = pathcopies(repo[fromrev], repo[skiprev])

861

exclude = pathcopies(repo[fromrev], repo[skiprev])

831

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

862

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

832

if dst in exclude:

863

if dst in exclude:

833

continue

864

continue

834

if dst in wctx:

865

if dst in wctx:

835

wctx[dst].markcopied(src)

866

wctx[dst].markcopied(src)

836

867

837

868

838

def computechangesetfilesadded(ctx):

869

def computechangesetfilesadded(ctx):

839

"""return the list of files added in a changeset

870

"""return the list of files added in a changeset

840

"""

871

"""

841

added = []

872

added = []

842

for f in ctx.files():

873

for f in ctx.files():

843

if not any(f in p for p in ctx.parents()):

874

if not any(f in p for p in ctx.parents()):

844

added.append(f)

875

added.append(f)

845

return added

876

return added

846

877

847

878

848

def computechangesetfilesremoved(ctx):

879

def computechangesetfilesremoved(ctx):

849

"""return the list of files removed in a changeset

880

"""return the list of files removed in a changeset

850

"""

881

"""

851

removed = []

882

removed = []

852

for f in ctx.files():

883

for f in ctx.files():

853

if f not in ctx:

884

if f not in ctx:

854

removed.append(f)

885

removed.append(f)

855

return removed

886

return removed

856

887

857

888

858

def computechangesetcopies(ctx):

889

def computechangesetcopies(ctx):

859

"""return the copies data for a changeset

890

"""return the copies data for a changeset

860

891

861

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

892

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

862

893

863

Each dictionnary are in the form: `{newname: oldname}`

894

Each dictionnary are in the form: `{newname: oldname}`

864

"""

895

"""

865

p1copies = {}

896

p1copies = {}

866

p2copies = {}

897

p2copies = {}

867

p1 = ctx.p1()

898

p1 = ctx.p1()

868

p2 = ctx.p2()

899

p2 = ctx.p2()

869

narrowmatch = ctx._repo.narrowmatch()

900

narrowmatch = ctx._repo.narrowmatch()

870

for dst in ctx.files():

901

for dst in ctx.files():

871

if not narrowmatch(dst) or dst not in ctx:

902

if not narrowmatch(dst) or dst not in ctx:

872

continue

903

continue

873

copied = ctx[dst].renamed()

904

copied = ctx[dst].renamed()

874

if not copied:

905

if not copied:

875

continue

906

continue

876

src, srcnode = copied

907

src, srcnode = copied

877

if src in p1 and p1[src].filenode() == srcnode:

908

if src in p1 and p1[src].filenode() == srcnode:

878

p1copies[dst] = src

909

p1copies[dst] = src

879

elif src in p2 and p2[src].filenode() == srcnode:

910

elif src in p2 and p2[src].filenode() == srcnode:

880

p2copies[dst] = src

911

p2copies[dst] = src

881

return p1copies, p2copies

912

return p1copies, p2copies

882

913

883

914

884

def encodecopies(files, copies):

915

def encodecopies(files, copies):

885

items = []

916

items = []

886

for i, dst in enumerate(files):

917

for i, dst in enumerate(files):

887

if dst in copies:

918

if dst in copies:

888

items.append(b'%d\0%s' % (i, copies[dst]))

919

items.append(b'%d\0%s' % (i, copies[dst]))

889

if len(items) != len(copies):

920

if len(items) != len(copies):

890

raise error.ProgrammingError(

921

raise error.ProgrammingError(

891

b'some copy targets missing from file list'

922

b'some copy targets missing from file list'

892

)

923

)

893

return b"\n".join(items)

924

return b"\n".join(items)

894

925

895

926

896

def decodecopies(files, data):

927

def decodecopies(files, data):

897

try:

928

try:

898

copies = {}

929

copies = {}

899

if not data:

930

if not data:

900

return copies

931

return copies

901

for l in data.split(b'\n'):

932

for l in data.split(b'\n'):

902

strindex, src = l.split(b'\0')

933

strindex, src = l.split(b'\0')

903

i = int(strindex)

934

i = int(strindex)

904

dst = files[i]

935

dst = files[i]

905

copies[dst] = src

936

copies[dst] = src

906

return copies

937

return copies

907

except (ValueError, IndexError):

938

except (ValueError, IndexError):

908

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

939

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

909

# used different syntax for the value.

940

# used different syntax for the value.

910

return None

941

return None

911

942

912

943

913

def encodefileindices(files, subset):

944

def encodefileindices(files, subset):

914

subset = set(subset)

945

subset = set(subset)

915

indices = []

946

indices = []

916

for i, f in enumerate(files):

947

for i, f in enumerate(files):

917

if f in subset:

948

if f in subset:

918

indices.append(b'%d' % i)

949

indices.append(b'%d' % i)

919

return b'\n'.join(indices)

950

return b'\n'.join(indices)

920

951

921

952

922

def decodefileindices(files, data):

953

def decodefileindices(files, data):

923

try:

954

try:

924

subset = []

955

subset = []

925

if not data:

956

if not data:

926

return subset

957

return subset

927

for strindex in data.split(b'\n'):

958

for strindex in data.split(b'\n'):

928

i = int(strindex)

959

i = int(strindex)

929

if i < 0 or i >= len(files):

960

if i < 0 or i >= len(files):

930

return None

961

return None

931

subset.append(files[i])

962

subset.append(files[i])

932

return subset

963

return subset

933

except (ValueError, IndexError):

964

except (ValueError, IndexError):

934

# Perhaps someone had chosen the same key name (e.g. "added") and

965

# Perhaps someone had chosen the same key name (e.g. "added") and

935

# used different syntax for the value.

966

# used different syntax for the value.

936

return None

967

return None

937

968

938

969

939

def _getsidedata(srcrepo, rev):

970

def _getsidedata(srcrepo, rev):

940

ctx = srcrepo[rev]

971

ctx = srcrepo[rev]

941

filescopies = computechangesetcopies(ctx)

972

filescopies = computechangesetcopies(ctx)

942

filesadded = computechangesetfilesadded(ctx)

973

filesadded = computechangesetfilesadded(ctx)

943

filesremoved = computechangesetfilesremoved(ctx)

974

filesremoved = computechangesetfilesremoved(ctx)

944

sidedata = {}

975

sidedata = {}

945

if any([filescopies, filesadded, filesremoved]):

976

if any([filescopies, filesadded, filesremoved]):

946

sortedfiles = sorted(ctx.files())

977

sortedfiles = sorted(ctx.files())

947

p1copies, p2copies = filescopies

978

p1copies, p2copies = filescopies

948

p1copies = encodecopies(sortedfiles, p1copies)

979

p1copies = encodecopies(sortedfiles, p1copies)

949

p2copies = encodecopies(sortedfiles, p2copies)

980

p2copies = encodecopies(sortedfiles, p2copies)

950

filesadded = encodefileindices(sortedfiles, filesadded)

981

filesadded = encodefileindices(sortedfiles, filesadded)

951

filesremoved = encodefileindices(sortedfiles, filesremoved)

982

filesremoved = encodefileindices(sortedfiles, filesremoved)

952

if p1copies:

983

if p1copies:

953

sidedata[sidedatamod.SD_P1COPIES] = p1copies

984

sidedata[sidedatamod.SD_P1COPIES] = p1copies

954

if p2copies:

985

if p2copies:

955

sidedata[sidedatamod.SD_P2COPIES] = p2copies

986

sidedata[sidedatamod.SD_P2COPIES] = p2copies

956

if filesadded:

987

if filesadded:

957

sidedata[sidedatamod.SD_FILESADDED] = filesadded

988

sidedata[sidedatamod.SD_FILESADDED] = filesadded

958

if filesremoved:

989

if filesremoved:

959

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

990

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

960

return sidedata

991

return sidedata

961

992

962

993

963

def getsidedataadder(srcrepo, destrepo):

994

def getsidedataadder(srcrepo, destrepo):

964

def sidedatacompanion(revlog, rev):

995

def sidedatacompanion(revlog, rev):

965

sidedata = {}

996

sidedata = {}

966

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

997

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

967

sidedata = _getsidedata(srcrepo, rev)

998

sidedata = _getsidedata(srcrepo, rev)

968

return False, (), sidedata

999

return False, (), sidedata

969

1000

970

return sidedatacompanion

1001

return sidedatacompanion

971

1002

972

1003

973

def getsidedataremover(srcrepo, destrepo):

1004

def getsidedataremover(srcrepo, destrepo):

974

def sidedatacompanion(revlog, rev):

1005

def sidedatacompanion(revlog, rev):

975

f = ()

1006

f = ()

976

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1007

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

977

if revlog.flags(rev) & REVIDX_SIDEDATA:

1008

if revlog.flags(rev) & REVIDX_SIDEDATA:

978

f = (

1009

f = (

979

sidedatamod.SD_P1COPIES,

1010

sidedatamod.SD_P1COPIES,

980

sidedatamod.SD_P2COPIES,

1011

sidedatamod.SD_P2COPIES,

981

sidedatamod.SD_FILESADDED,

1012

sidedatamod.SD_FILESADDED,

982

sidedatamod.SD_FILESREMOVED,

1013

sidedatamod.SD_FILESREMOVED,

983

)

1014

)

984

return False, f, {}

1015

return False, f, {}

985

1016

986

return sidedatacompanion

1017

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
+                    # A small cache to avoid doing the work twice for merges
+                    #
+                    # In the vast majority of cases, if we ask information for a revision
+                    # about 1 parent, we'll later ask it for the other. So it make sense to
+                    # keep the information around when reaching the first parent of a merge
+                    # and dropping it after it was provided for the second parents.
+                    #
+                    # It exists cases were only one parent of the merge will be walked. It
+                    # happens when the "destination" the copy tracing is descendant from a
+                    # new root, not common with the "source". In that case, we will only walk
+                    # through merge parents that are descendant of changesets common
+                    # between "source" and "destination".
+                    #
+                    # With the current case implementation if such changesets have a copy
+                    # information, we'll keep them in memory until the end of
+                    # _changesetforwardcopies. We don't expect the case to be frequent
+                    # enough to matters.
+                    #
+                    # In addition, it would be possible to reach pathological case, were
+                    # many first parent are met before any second parent is reached. In
+                    # that case the cache could grow. If this even become an issue one can
+                    # safely introduce a maximum cache size. This would trade extra CPU/IO
+                    # time to save memory.
+                    merge_caches = {}
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
+                            e = merge_caches.pop(rev, None)
+                            if e is not None:
+                                return e
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
+                            if p1 != node.nullrev and p2 != node.nullrev:
+                                # XXX some case we over cache, IGNORE
+                                merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = ()
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 all_copies = {r: {} for r in roots}
                 alwaysmatch = match.always()
                 for r in sorted(iterrevs):
                     copies = all_copies.pop(r)
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                             # _chain makes a copies, we can avoid doing so in some
                             # simple/linear cases.
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 return all_copies[b.rev()]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion