upstream/mercurial-mirror Commit - r44177:fb8f10c0

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

18

from . import (

18

from . import (

19

error,

19

error,

20

match as matchmod,

20

match as matchmod,

21

node,

21

node,

22

pathutil,

22

pathutil,

23

pycompat,

23

pycompat,

24

util,

24

util,

25

)

25

)

26

27

from .revlogutils import sidedata as sidedatamod

27

from .revlogutils import sidedata as sidedatamod

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

32

def _filter(src, dst, t):

32

def _filter(src, dst, t):

33

"""filters out invalid copies after chaining"""

33

"""filters out invalid copies after chaining"""

34

35

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

35

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

36

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# in the following table (not including trivial cases). For example, case 2

37

# in the following table (not including trivial cases). For example, case 2

38

# is where a file existed in 'src' and remained under that name in 'mid' and

38

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# then was renamed between 'mid' and 'dst'.

39

# then was renamed between 'mid' and 'dst'.

40

#

40

#

41

# case src mid dst result

41

# case src mid dst result

42

# 1 x y - -

42

# 1 x y - -

43

# 2 x y y x->y

43

# 2 x y y x->y

44

# 3 x y x -

44

# 3 x y x -

45

# 4 x y z x->z

45

# 4 x y z x->z

46

# 5 - x y -

46

# 5 - x y -

47

# 6 x x y x->y

47

# 6 x x y x->y

48

#

48

#

49

# _chain() takes care of chaining the copies in 'a' and 'b', but it

49

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

50

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# between 5 and 6, so it includes all cases in its result.

51

# between 5 and 6, so it includes all cases in its result.

52

# Cases 1, 3, and 5 are then removed by _filter().

52

# Cases 1, 3, and 5 are then removed by _filter().

53

54

for k, v in list(t.items()):

54

for k, v in list(t.items()):

55

# remove copies from files that didn't exist

55

# remove copies from files that didn't exist

56

if v not in src:

56

if v not in src:

57

del t[k]

57

del t[k]

58

# remove criss-crossed copies

58

# remove criss-crossed copies

59

elif k in src and v in dst:

59

elif k in src and v in dst:

60

del t[k]

60

del t[k]

61

# remove copies to files that were then removed

61

# remove copies to files that were then removed

62

elif k not in dst:

62

elif k not in dst:

63

del t[k]

63

del t[k]

64

65

66

def _chain(prefix, suffix):

66

def _chain(prefix, suffix):

67

"""chain two sets of copies 'prefix' and 'suffix'"""

67

"""chain two sets of copies 'prefix' and 'suffix'"""

68

result = prefix.copy()

68

result = prefix.copy()

69

for key, value in pycompat.iteritems(suffix):

69

for key, value in pycompat.iteritems(suffix):

70

result[key] = prefix.get(value, value)

70

result[key] = prefix.get(value, value)

71

return result

71

return result

72

73

74

def _tracefile(fctx, am, basemf):

74

def _tracefile(fctx, am, basemf):

75

"""return file context that is the ancestor of fctx present in ancestor

75

"""return file context that is the ancestor of fctx present in ancestor

76

manifest am

76

manifest am

77

78

Note: we used to try and stop after a given limit, however checking if that

78

Note: we used to try and stop after a given limit, however checking if that

79

limit is reached turned out to be very expensive. we are better off

79

limit is reached turned out to be very expensive. we are better off

80

disabling that feature."""

80

disabling that feature."""

81

82

for f in fctx.ancestors():

82

for f in fctx.ancestors():

83

path = f.path()

83

path = f.path()

84

if am.get(path, None) == f.filenode():

84

if am.get(path, None) == f.filenode():

85

return path

85

return path

86

if basemf and basemf.get(path, None) == f.filenode():

86

if basemf and basemf.get(path, None) == f.filenode():

87

return path

87

return path

88

89

90

def _dirstatecopies(repo, match=None):

90

def _dirstatecopies(repo, match=None):

91

ds = repo.dirstate

91

ds = repo.dirstate

92

c = ds.copies().copy()

92

c = ds.copies().copy()

93

for k in list(c):

93

for k in list(c):

94

if ds[k] not in b'anm' or (match and not match(k)):

94

if ds[k] not in b'anm' or (match and not match(k)):

95

del c[k]

95

del c[k]

96

return c

96

return c

97

98

99

def _computeforwardmissing(a, b, match=None):

99

def _computeforwardmissing(a, b, match=None):

100

"""Computes which files are in b but not a.

100

"""Computes which files are in b but not a.

101

This is its own function so extensions can easily wrap this call to see what

101

This is its own function so extensions can easily wrap this call to see what

102

files _forwardcopies is about to process.

102

files _forwardcopies is about to process.

103

"""

103

"""

104

ma = a.manifest()

104

ma = a.manifest()

105

mb = b.manifest()

105

mb = b.manifest()

106

return mb.filesnotin(ma, match=match)

106

return mb.filesnotin(ma, match=match)

107

108

109

def usechangesetcentricalgo(repo):

109

def usechangesetcentricalgo(repo):

110

"""Checks if we should use changeset-centric copy algorithms"""

110

"""Checks if we should use changeset-centric copy algorithms"""

111

if repo.filecopiesmode == b'changeset-sidedata':

111

if repo.filecopiesmode == b'changeset-sidedata':

112

return True

112

return True

113

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

113

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

changesetsource = (b'changeset-only', b'compatibility')

114

changesetsource = (b'changeset-only', b'compatibility')

115

return readfrom in changesetsource

115

return readfrom in changesetsource

116

117

118

def _committedforwardcopies(a, b, base, match):

118

def _committedforwardcopies(a, b, base, match):

119

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

119

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

# files might have to be traced back to the fctx parent of the last

120

# files might have to be traced back to the fctx parent of the last

121

# one-side-only changeset, but not further back than that

121

# one-side-only changeset, but not further back than that

122

repo = a._repo

122

repo = a._repo

123

124

if usechangesetcentricalgo(repo):

124

if usechangesetcentricalgo(repo):

125

return _changesetforwardcopies(a, b, match)

125

return _changesetforwardcopies(a, b, match)

126

127

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

127

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

dbg = repo.ui.debug

128

dbg = repo.ui.debug

129

if debug:

129

if debug:

130

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

130

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

am = a.manifest()

131

am = a.manifest()

132

basemf = None if base is None else base.manifest()

132

basemf = None if base is None else base.manifest()

133

134

# find where new files came from

134

# find where new files came from

135

# we currently don't try to find where old files went, too expensive

135

# we currently don't try to find where old files went, too expensive

136

# this means we can miss a case like 'hg rm b; hg cp a b'

136

# this means we can miss a case like 'hg rm b; hg cp a b'

137

cm = {}

137

cm = {}

138

139

# Computing the forward missing is quite expensive on large manifests, since

139

# Computing the forward missing is quite expensive on large manifests, since

140

# it compares the entire manifests. We can optimize it in the common use

140

# it compares the entire manifests. We can optimize it in the common use

141

# case of computing what copies are in a commit versus its parent (like

141

# case of computing what copies are in a commit versus its parent (like

142

# during a rebase or histedit). Note, we exclude merge commits from this

142

# during a rebase or histedit). Note, we exclude merge commits from this

143

# optimization, since the ctx.files() for a merge commit is not correct for

143

# optimization, since the ctx.files() for a merge commit is not correct for

144

# this comparison.

144

# this comparison.

145

forwardmissingmatch = match

145

forwardmissingmatch = match

146

if b.p1() == a and b.p2().node() == node.nullid:

146

if b.p1() == a and b.p2().node() == node.nullid:

147

filesmatcher = matchmod.exact(b.files())

147

filesmatcher = matchmod.exact(b.files())

148

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

148

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

149

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

151

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

151

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

153

if debug:

153

if debug:

154

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

154

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

156

for f in sorted(missing):

156

for f in sorted(missing):

157

if debug:

157

if debug:

158

dbg(b'debug.copies: tracing file: %s\n' % f)

158

dbg(b'debug.copies: tracing file: %s\n' % f)

159

fctx = b[f]

159

fctx = b[f]

160

fctx._ancestrycontext = ancestrycontext

160

fctx._ancestrycontext = ancestrycontext

161

162

if debug:

162

if debug:

163

start = util.timer()

163

start = util.timer()

164

opath = _tracefile(fctx, am, basemf)

164

opath = _tracefile(fctx, am, basemf)

165

if opath:

165

if opath:

166

if debug:

166

if debug:

167

dbg(b'debug.copies: rename of: %s\n' % opath)

167

dbg(b'debug.copies: rename of: %s\n' % opath)

168

cm[f] = opath

168

cm[f] = opath

169

if debug:

169

if debug:

170

dbg(

170

dbg(

171

b'debug.copies: time: %f seconds\n'

171

b'debug.copies: time: %f seconds\n'

172

% (util.timer() - start)

172

% (util.timer() - start)

173

)

173

)

174

return cm

174

return cm

175

176

177

def _revinfogetter(repo):

177

def _revinfogetter(repo):

178

"""return a function that return multiple data given a <rev>"i

178

"""return a function that return multiple data given a <rev>"i

179

180

* p1: revision number of first parent

180

* p1: revision number of first parent

181

* p2: revision number of first parent

181

* p2: revision number of first parent

182

* p1copies: mapping of copies from p1

182

* p1copies: mapping of copies from p1

183

* p2copies: mapping of copies from p2

183

* p2copies: mapping of copies from p2

184

* removed: a list of removed files

184

* removed: a list of removed files

185

"""

185

"""

186

cl = repo.changelog

186

cl = repo.changelog

187

parents = cl.parentrevs

187

parents = cl.parentrevs

188

189

if repo.filecopiesmode == b'changeset-sidedata':

189

if repo.filecopiesmode == b'changeset-sidedata':

190

changelogrevision = cl.changelogrevision

190

changelogrevision = cl.changelogrevision

191

flags = cl.flags

191

flags = cl.flags

192

193

# A small cache to avoid doing the work twice for merges

193

# A small cache to avoid doing the work twice for merges

194

#

194

#

195

# In the vast majority of cases, if we ask information for a revision

195

# In the vast majority of cases, if we ask information for a revision

196

# about 1 parent, we'll later ask it for the other. So it make sense to

196

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# keep the information around when reaching the first parent of a merge

197

# keep the information around when reaching the first parent of a merge

198

# and dropping it after it was provided for the second parents.

198

# and dropping it after it was provided for the second parents.

199

#

199

#

200

# It exists cases were only one parent of the merge will be walked. It

200

# It exists cases were only one parent of the merge will be walked. It

201

# happens when the "destination" the copy tracing is descendant from a

201

# happens when the "destination" the copy tracing is descendant from a

202

# new root, not common with the "source". In that case, we will only walk

202

# new root, not common with the "source". In that case, we will only walk

203

# through merge parents that are descendant of changesets common

203

# through merge parents that are descendant of changesets common

204

# between "source" and "destination".

204

# between "source" and "destination".

205

#

205

#

206

# With the current case implementation if such changesets have a copy

206

# With the current case implementation if such changesets have a copy

207

# information, we'll keep them in memory until the end of

207

# information, we'll keep them in memory until the end of

208

# _changesetforwardcopies. We don't expect the case to be frequent

208

# _changesetforwardcopies. We don't expect the case to be frequent

209

# enough to matters.

209

# enough to matters.

210

#

210

#

211

# In addition, it would be possible to reach pathological case, were

211

# In addition, it would be possible to reach pathological case, were

212

# many first parent are met before any second parent is reached. In

212

# many first parent are met before any second parent is reached. In

213

# that case the cache could grow. If this even become an issue one can

213

# that case the cache could grow. If this even become an issue one can

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# time to save memory.

215

# time to save memory.

216

merge_caches = {}

216

merge_caches = {}

217

218

def revinfo(rev):

218

def revinfo(rev):

219

p1, p2 = parents(rev)

219

p1, p2 = parents(rev)

220

if flags(rev) & REVIDX_SIDEDATA:

220

if flags(rev) & REVIDX_SIDEDATA:

221

e = merge_caches.pop(rev, None)

221

e = merge_caches.pop(rev, None)

222

if e is not None:

222

if e is not None:

223

return e

223

return e

224

c = changelogrevision(rev)

224

c = changelogrevision(rev)

225

p1copies = c.p1copies

225

p1copies = c.p1copies

226

p2copies = c.p2copies

226

p2copies = c.p2copies

227

removed = c.filesremoved

227

removed = c.filesremoved

228

if p1 != node.nullrev and p2 != node.nullrev:

228

if p1 != node.nullrev and p2 != node.nullrev:

229

# XXX some case we over cache, IGNORE

229

# XXX some case we over cache, IGNORE

230

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

230

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

231

else:

231

else:

232

p1copies = {}

232

p1copies = {}

233

p2copies = {}

233

p2copies = {}

234

removed = []

234

removed = []

235

return p1, p2, p1copies, p2copies, removed

235

return p1, p2, p1copies, p2copies, removed

236

237

else:

237

else:

238

239

def revinfo(rev):

239

def revinfo(rev):

240

p1, p2 = parents(rev)

240

p1, p2 = parents(rev)

241

ctx = repo[rev]

241

ctx = repo[rev]

242

p1copies, p2copies = ctx._copies

242

p1copies, p2copies = ctx._copies

243

removed = ctx.filesremoved()

243

removed = ctx.filesremoved()

244

return p1, p2, p1copies, p2copies, removed

244

return p1, p2, p1copies, p2copies, removed

245

246

return revinfo

246

return revinfo

247

248

249

def _changesetforwardcopies(a, b, match):

249

def _changesetforwardcopies(a, b, match):

250

if a.rev() in (node.nullrev, b.rev()):

250

if a.rev() in (node.nullrev, b.rev()):

251

return {}

251

return {}

252

253

repo = a.repo().unfiltered()

253

repo = a.repo().unfiltered()

254

children = {}

254

children = {}

255

revinfo = _revinfogetter(repo)

255

revinfo = _revinfogetter(repo)

256

257

cl = repo.changelog

257

cl = repo.changelog

258

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

258

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

259

mrset = set(missingrevs)

259

mrset = set(missingrevs)

260

roots = set()

260

roots = set()

261

for r in missingrevs:

261

for r in missingrevs:

262

for p in cl.parentrevs(r):

262

for p in cl.parentrevs(r):

263

if p == node.nullrev:

263

if p == node.nullrev:

264

continue

264

continue

265

if p not in children:

265

if p not in children:

266

children[p] = [r]

266

children[p] = [r]

267

else:

267

else:

268

children[p].append(r)

268

children[p].append(r)

269

if p not in mrset:

269

if p not in mrset:

270

roots.add(p)

270

roots.add(p)

271

if not roots:

271

if not roots:

272

# no common revision to track copies from

272

# no common revision to track copies from

273

return {}

273

return {}

274

min_root = min(roots)

274

min_root = min(roots)

275

276

from_head = set(

276

from_head = set(

277

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

277

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

278

)

278

)

279

280

iterrevs = set(from_head)

280

iterrevs = set(from_head)

281

iterrevs &= mrset

281

iterrevs &= mrset

282

iterrevs.update(roots)

282

iterrevs.update(roots)

283

iterrevs.remove(b.rev())

283

iterrevs.remove(b.rev())

284

all_copies = {r: {} ~~for~~ r in ~~roots~~}

284

all_copies = {}

285

alwaysmatch = match.always()

285

alwaysmatch = match.always()

286

for r in sorted(iterrevs):

286

for r in sorted(iterrevs):

287

copies = all_copies.pop(r)

287

copies = all_copies.pop(r, None)

288

if copies is None:

289

# this is a root

290

copies = {}

288

for i, c in enumerate(children[r]):

291

for i, c in enumerate(children[r]):

289

p1, p2, p1copies, p2copies, removed = revinfo(c)

292

p1, p2, p1copies, p2copies, removed = revinfo(c)

290

if r == p1:

293

if r == p1:

291

parent = 1

294

parent = 1

292

childcopies = p1copies

295

childcopies = p1copies

293

else:

296

else:

294

assert r == p2

297

assert r == p2

295

parent = 2

298

parent = 2

296

childcopies = p2copies

299

childcopies = p2copies

297

if not alwaysmatch:

300

if not alwaysmatch:

298

childcopies = {

301

childcopies = {

299

dst: src for dst, src in childcopies.items() if match(dst)

302

dst: src for dst, src in childcopies.items() if match(dst)

300

}

303

}

301

newcopies = copies

304

newcopies = copies

302

if childcopies:

305

if childcopies:

303

newcopies = _chain(newcopies, childcopies)

306

newcopies = _chain(newcopies, childcopies)

304

# _chain makes a copies, we can avoid doing so in some

307

# _chain makes a copies, we can avoid doing so in some

305

# simple/linear cases.

308

# simple/linear cases.

306

assert newcopies is not copies

309

assert newcopies is not copies

307

for f in removed:

310

for f in removed:

308

if f in newcopies:

311

if f in newcopies:

309

if newcopies is copies:

312

if newcopies is copies:

310

# copy on write to avoid affecting potential other

313

# copy on write to avoid affecting potential other

311

# branches. when there are no other branches, this

314

# branches. when there are no other branches, this

312

# could be avoided.

315

# could be avoided.

313

newcopies = copies.copy()

316

newcopies = copies.copy()

314

del newcopies[f]

317

del newcopies[f]

315

othercopies = all_copies.get(c)

318

othercopies = all_copies.get(c)

316

if othercopies is None:

319

if othercopies is None:

317

all_copies[c] = newcopies

320

all_copies[c] = newcopies

318

else:

321

else:

319

# we are the second parent to work on c, we need to merge our

322

# we are the second parent to work on c, we need to merge our

320

# work with the other.

323

# work with the other.

321

#

324

#

322

# Unlike when copies are stored in the filelog, we consider

325

# Unlike when copies are stored in the filelog, we consider

323

# it a copy even if the destination already existed on the

326

# it a copy even if the destination already existed on the

324

# other branch. It's simply too expensive to check if the

327

# other branch. It's simply too expensive to check if the

325

# file existed in the manifest.

328

# file existed in the manifest.

326

#

329

#

327

# In case of conflict, parent 1 take precedence over parent 2.

330

# In case of conflict, parent 1 take precedence over parent 2.

328

# This is an arbitrary choice made anew when implementing

331

# This is an arbitrary choice made anew when implementing

329

# changeset based copies. It was made without regards with

332

# changeset based copies. It was made without regards with

330

# potential filelog related behavior.

333

# potential filelog related behavior.

331

if parent == 1:

334

if parent == 1:

332

othercopies.update(newcopies)

335

othercopies.update(newcopies)

333

else:

336

else:

334

newcopies.update(othercopies)

337

newcopies.update(othercopies)

335

all_copies[c] = newcopies

338

all_copies[c] = newcopies

336

return all_copies[b.rev()]

339

return all_copies[b.rev()]

337

340

338

341

339

def _forwardcopies(a, b, base=None, match=None):

342

def _forwardcopies(a, b, base=None, match=None):

340

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

343

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

341

344

342

if base is None:

345

if base is None:

343

base = a

346

base = a

344

match = a.repo().narrowmatch(match)

347

match = a.repo().narrowmatch(match)

345

# check for working copy

348

# check for working copy

346

if b.rev() is None:

349

if b.rev() is None:

347

cm = _committedforwardcopies(a, b.p1(), base, match)

350

cm = _committedforwardcopies(a, b.p1(), base, match)

348

# combine copies from dirstate if necessary

351

# combine copies from dirstate if necessary

349

copies = _chain(cm, _dirstatecopies(b._repo, match))

352

copies = _chain(cm, _dirstatecopies(b._repo, match))

350

else:

353

else:

351

copies = _committedforwardcopies(a, b, base, match)

354

copies = _committedforwardcopies(a, b, base, match)

352

return copies

355

return copies

353

356

354

357

355

def _backwardrenames(a, b, match):

358

def _backwardrenames(a, b, match):

356

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

359

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

357

return {}

360

return {}

358

361

359

# Even though we're not taking copies into account, 1:n rename situations

362

# Even though we're not taking copies into account, 1:n rename situations

360

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

363

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

361

# arbitrarily pick one of the renames.

364

# arbitrarily pick one of the renames.

362

# We don't want to pass in "match" here, since that would filter

365

# We don't want to pass in "match" here, since that would filter

363

# the destination by it. Since we're reversing the copies, we want

366

# the destination by it. Since we're reversing the copies, we want

364

# to filter the source instead.

367

# to filter the source instead.

365

f = _forwardcopies(b, a)

368

f = _forwardcopies(b, a)

366

r = {}

369

r = {}

367

for k, v in sorted(pycompat.iteritems(f)):

370

for k, v in sorted(pycompat.iteritems(f)):

368

if match and not match(v):

371

if match and not match(v):

369

continue

372

continue

370

# remove copies

373

# remove copies

371

if v in a:

374

if v in a:

372

continue

375

continue

373

r[v] = k

376

r[v] = k

374

return r

377

return r

375

378

376

379

377

def pathcopies(x, y, match=None):

380

def pathcopies(x, y, match=None):

378

"""find {dst@y: src@x} copy mapping for directed compare"""

381

"""find {dst@y: src@x} copy mapping for directed compare"""

379

repo = x._repo

382

repo = x._repo

380

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

383

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

381

if debug:

384

if debug:

382

repo.ui.debug(

385

repo.ui.debug(

383

b'debug.copies: searching copies from %s to %s\n' % (x, y)

386

b'debug.copies: searching copies from %s to %s\n' % (x, y)

384

)

387

)

385

if x == y or not x or not y:

388

if x == y or not x or not y:

386

return {}

389

return {}

387

a = y.ancestor(x)

390

a = y.ancestor(x)

388

if a == x:

391

if a == x:

389

if debug:

392

if debug:

390

repo.ui.debug(b'debug.copies: search mode: forward\n')

393

repo.ui.debug(b'debug.copies: search mode: forward\n')

391

if y.rev() is None and x == y.p1():

394

if y.rev() is None and x == y.p1():

392

# short-circuit to avoid issues with merge states

395

# short-circuit to avoid issues with merge states

393

return _dirstatecopies(repo, match)

396

return _dirstatecopies(repo, match)

394

copies = _forwardcopies(x, y, match=match)

397

copies = _forwardcopies(x, y, match=match)

395

elif a == y:

398

elif a == y:

396

if debug:

399

if debug:

397

repo.ui.debug(b'debug.copies: search mode: backward\n')

400

repo.ui.debug(b'debug.copies: search mode: backward\n')

398

copies = _backwardrenames(x, y, match=match)

401

copies = _backwardrenames(x, y, match=match)

399

else:

402

else:

400

if debug:

403

if debug:

401

repo.ui.debug(b'debug.copies: search mode: combined\n')

404

repo.ui.debug(b'debug.copies: search mode: combined\n')

402

base = None

405

base = None

403

if a.rev() != node.nullrev:

406

if a.rev() != node.nullrev:

404

base = x

407

base = x

405

copies = _chain(

408

copies = _chain(

406

_backwardrenames(x, a, match=match),

409

_backwardrenames(x, a, match=match),

407

_forwardcopies(a, y, base, match=match),

410

_forwardcopies(a, y, base, match=match),

408

)

411

)

409

_filter(x, y, copies)

412

_filter(x, y, copies)

410

return copies

413

return copies

411

414

412

415

413

def mergecopies(repo, c1, c2, base):

416

def mergecopies(repo, c1, c2, base):

414

"""

417

"""

415

Finds moves and copies between context c1 and c2 that are relevant for

418

Finds moves and copies between context c1 and c2 that are relevant for

416

merging. 'base' will be used as the merge base.

419

merging. 'base' will be used as the merge base.

417

420

418

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

421

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

419

files that were moved/ copied in one merge parent and modified in another.

422

files that were moved/ copied in one merge parent and modified in another.

420

For example:

423

For example:

421

424

422

o ---> 4 another commit

425

o ---> 4 another commit

423

|

426

|

424

| o ---> 3 commit that modifies a.txt

427

| o ---> 3 commit that modifies a.txt

425

| /

428

| /

426

o / ---> 2 commit that moves a.txt to b.txt

429

o / ---> 2 commit that moves a.txt to b.txt

427

|/

430

|/

428

o ---> 1 merge base

431

o ---> 1 merge base

429

432

430

If we try to rebase revision 3 on revision 4, since there is no a.txt in

433

If we try to rebase revision 3 on revision 4, since there is no a.txt in

431

revision 4, and if user have copytrace disabled, we prints the following

434

revision 4, and if user have copytrace disabled, we prints the following

432

message:

435

message:

433

436

434

```other changed <file> which local deleted```

437

```other changed <file> which local deleted```

435

438

436

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

439

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

437

"dirmove".

440

"dirmove".

438

441

439

"copy" is a mapping from destination name -> source name,

442

"copy" is a mapping from destination name -> source name,

440

where source is in c1 and destination is in c2 or vice-versa.

443

where source is in c1 and destination is in c2 or vice-versa.

441

444

442

"movewithdir" is a mapping from source name -> destination name,

445

"movewithdir" is a mapping from source name -> destination name,

443

where the file at source present in one context but not the other

446

where the file at source present in one context but not the other

444

needs to be moved to destination by the merge process, because the

447

needs to be moved to destination by the merge process, because the

445

other context moved the directory it is in.

448

other context moved the directory it is in.

446

449

447

"diverge" is a mapping of source name -> list of destination names

450

"diverge" is a mapping of source name -> list of destination names

448

for divergent renames.

451

for divergent renames.

449

452

450

"renamedelete" is a mapping of source name -> list of destination

453

"renamedelete" is a mapping of source name -> list of destination

451

names for files deleted in c1 that were renamed in c2 or vice-versa.

454

names for files deleted in c1 that were renamed in c2 or vice-versa.

452

455

453

"dirmove" is a mapping of detected source dir -> destination dir renames.

456

"dirmove" is a mapping of detected source dir -> destination dir renames.

454

This is needed for handling changes to new files previously grafted into

457

This is needed for handling changes to new files previously grafted into

455

renamed directories.

458

renamed directories.

456

459

457

This function calls different copytracing algorithms based on config.

460

This function calls different copytracing algorithms based on config.

458

"""

461

"""

459

# avoid silly behavior for update from empty dir

462

# avoid silly behavior for update from empty dir

460

if not c1 or not c2 or c1 == c2:

463

if not c1 or not c2 or c1 == c2:

461

return {}, {}, {}, {}, {}

464

return {}, {}, {}, {}, {}

462

465

463

narrowmatch = c1.repo().narrowmatch()

466

narrowmatch = c1.repo().narrowmatch()

464

467

465

# avoid silly behavior for parent -> working dir

468

# avoid silly behavior for parent -> working dir

466

if c2.node() is None and c1.node() == repo.dirstate.p1():

469

if c2.node() is None and c1.node() == repo.dirstate.p1():

467

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

470

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

468

471

469

copytracing = repo.ui.config(b'experimental', b'copytrace')

472

copytracing = repo.ui.config(b'experimental', b'copytrace')

470

if stringutil.parsebool(copytracing) is False:

473

if stringutil.parsebool(copytracing) is False:

471

# stringutil.parsebool() returns None when it is unable to parse the

474

# stringutil.parsebool() returns None when it is unable to parse the

472

# value, so we should rely on making sure copytracing is on such cases

475

# value, so we should rely on making sure copytracing is on such cases

473

return {}, {}, {}, {}, {}

476

return {}, {}, {}, {}, {}

474

477

475

if usechangesetcentricalgo(repo):

478

if usechangesetcentricalgo(repo):

476

# The heuristics don't make sense when we need changeset-centric algos

479

# The heuristics don't make sense when we need changeset-centric algos

477

return _fullcopytracing(repo, c1, c2, base)

480

return _fullcopytracing(repo, c1, c2, base)

478

481

479

# Copy trace disabling is explicitly below the node == p1 logic above

482

# Copy trace disabling is explicitly below the node == p1 logic above

480

# because the logic above is required for a simple copy to be kept across a

483

# because the logic above is required for a simple copy to be kept across a

481

# rebase.

484

# rebase.

482

if copytracing == b'heuristics':

485

if copytracing == b'heuristics':

483

# Do full copytracing if only non-public revisions are involved as

486

# Do full copytracing if only non-public revisions are involved as

484

# that will be fast enough and will also cover the copies which could

487

# that will be fast enough and will also cover the copies which could

485

# be missed by heuristics

488

# be missed by heuristics

486

if _isfullcopytraceable(repo, c1, base):

489

if _isfullcopytraceable(repo, c1, base):

487

return _fullcopytracing(repo, c1, c2, base)

490

return _fullcopytracing(repo, c1, c2, base)

488

return _heuristicscopytracing(repo, c1, c2, base)

491

return _heuristicscopytracing(repo, c1, c2, base)

489

else:

492

else:

490

return _fullcopytracing(repo, c1, c2, base)

493

return _fullcopytracing(repo, c1, c2, base)

491

494

492

495

493

def _isfullcopytraceable(repo, c1, base):

496

def _isfullcopytraceable(repo, c1, base):

494

""" Checks that if base, source and destination are all no-public branches,

497

""" Checks that if base, source and destination are all no-public branches,

495

if yes let's use the full copytrace algorithm for increased capabilities

498

if yes let's use the full copytrace algorithm for increased capabilities

496

since it will be fast enough.

499

since it will be fast enough.

497

500

498

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

501

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

499

number of changesets from c1 to base such that if number of changesets are

502

number of changesets from c1 to base such that if number of changesets are

500

more than the limit, full copytracing algorithm won't be used.

503

more than the limit, full copytracing algorithm won't be used.

501

"""

504

"""

502

if c1.rev() is None:

505

if c1.rev() is None:

503

c1 = c1.p1()

506

c1 = c1.p1()

504

if c1.mutable() and base.mutable():

507

if c1.mutable() and base.mutable():

505

sourcecommitlimit = repo.ui.configint(

508

sourcecommitlimit = repo.ui.configint(

506

b'experimental', b'copytrace.sourcecommitlimit'

509

b'experimental', b'copytrace.sourcecommitlimit'

507

)

510

)

508

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

511

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

509

return commits < sourcecommitlimit

512

return commits < sourcecommitlimit

510

return False

513

return False

511

514

512

515

513

def _checksinglesidecopies(

516

def _checksinglesidecopies(

514

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

517

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

515

):

518

):

516

if src not in m2:

519

if src not in m2:

517

# deleted on side 2

520

# deleted on side 2

518

if src not in m1:

521

if src not in m1:

519

# renamed on side 1, deleted on side 2

522

# renamed on side 1, deleted on side 2

520

renamedelete[src] = dsts1

523

renamedelete[src] = dsts1

521

elif m2[src] != mb[src]:

524

elif m2[src] != mb[src]:

522

if not _related(c2[src], base[src]):

525

if not _related(c2[src], base[src]):

523

return

526

return

524

# modified on side 2

527

# modified on side 2

525

for dst in dsts1:

528

for dst in dsts1:

526

if dst not in m2:

529

if dst not in m2:

527

# dst not added on side 2 (handle as regular

530

# dst not added on side 2 (handle as regular

528

# "both created" case in manifestmerge otherwise)

531

# "both created" case in manifestmerge otherwise)

529

copy[dst] = src

532

copy[dst] = src

530

533

531

534

532

def _fullcopytracing(repo, c1, c2, base):

535

def _fullcopytracing(repo, c1, c2, base):

533

""" The full copytracing algorithm which finds all the new files that were

536

""" The full copytracing algorithm which finds all the new files that were

534

added from merge base up to the top commit and for each file it checks if

537

added from merge base up to the top commit and for each file it checks if

535

this file was copied from another file.

538

this file was copied from another file.

536

539

537

This is pretty slow when a lot of changesets are involved but will track all

540

This is pretty slow when a lot of changesets are involved but will track all

538

the copies.

541

the copies.

539

"""

542

"""

540

m1 = c1.manifest()

543

m1 = c1.manifest()

541

m2 = c2.manifest()

544

m2 = c2.manifest()

542

mb = base.manifest()

545

mb = base.manifest()

543

546

544

copies1 = pathcopies(base, c1)

547

copies1 = pathcopies(base, c1)

545

copies2 = pathcopies(base, c2)

548

copies2 = pathcopies(base, c2)

546

549

547

inversecopies1 = {}

550

inversecopies1 = {}

548

inversecopies2 = {}

551

inversecopies2 = {}

549

for dst, src in copies1.items():

552

for dst, src in copies1.items():

550

inversecopies1.setdefault(src, []).append(dst)

553

inversecopies1.setdefault(src, []).append(dst)

551

for dst, src in copies2.items():

554

for dst, src in copies2.items():

552

inversecopies2.setdefault(src, []).append(dst)

555

inversecopies2.setdefault(src, []).append(dst)

553

556

554

copy = {}

557

copy = {}

555

diverge = {}

558

diverge = {}

556

renamedelete = {}

559

renamedelete = {}

557

allsources = set(inversecopies1) | set(inversecopies2)

560

allsources = set(inversecopies1) | set(inversecopies2)

558

for src in allsources:

561

for src in allsources:

559

dsts1 = inversecopies1.get(src)

562

dsts1 = inversecopies1.get(src)

560

dsts2 = inversecopies2.get(src)

563

dsts2 = inversecopies2.get(src)

561

if dsts1 and dsts2:

564

if dsts1 and dsts2:

562

# copied/renamed on both sides

565

# copied/renamed on both sides

563

if src not in m1 and src not in m2:

566

if src not in m1 and src not in m2:

564

# renamed on both sides

567

# renamed on both sides

565

dsts1 = set(dsts1)

568

dsts1 = set(dsts1)

566

dsts2 = set(dsts2)

569

dsts2 = set(dsts2)

567

# If there's some overlap in the rename destinations, we

570

# If there's some overlap in the rename destinations, we

568

# consider it not divergent. For example, if side 1 copies 'a'

571

# consider it not divergent. For example, if side 1 copies 'a'

569

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

572

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

570

# and 'd' and deletes 'a'.

573

# and 'd' and deletes 'a'.

571

if dsts1 & dsts2:

574

if dsts1 & dsts2:

572

for dst in dsts1 & dsts2:

575

for dst in dsts1 & dsts2:

573

copy[dst] = src

576

copy[dst] = src

574

else:

577

else:

575

diverge[src] = sorted(dsts1 | dsts2)

578

diverge[src] = sorted(dsts1 | dsts2)

576

elif src in m1 and src in m2:

579

elif src in m1 and src in m2:

577

# copied on both sides

580

# copied on both sides

578

dsts1 = set(dsts1)

581

dsts1 = set(dsts1)

579

dsts2 = set(dsts2)

582

dsts2 = set(dsts2)

580

for dst in dsts1 & dsts2:

583

for dst in dsts1 & dsts2:

581

copy[dst] = src

584

copy[dst] = src

582

# TODO: Handle cases where it was renamed on one side and copied

585

# TODO: Handle cases where it was renamed on one side and copied

583

# on the other side

586

# on the other side

584

elif dsts1:

587

elif dsts1:

585

# copied/renamed only on side 1

588

# copied/renamed only on side 1

586

_checksinglesidecopies(

589

_checksinglesidecopies(

587

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

590

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

588

)

591

)

589

elif dsts2:

592

elif dsts2:

590

# copied/renamed only on side 2

593

# copied/renamed only on side 2

591

_checksinglesidecopies(

594

_checksinglesidecopies(

592

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

595

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

593

)

596

)

594

597

595

renamedeleteset = set()

598

renamedeleteset = set()

596

divergeset = set()

599

divergeset = set()

597

for dsts in diverge.values():

600

for dsts in diverge.values():

598

divergeset.update(dsts)

601

divergeset.update(dsts)

599

for dsts in renamedelete.values():

602

for dsts in renamedelete.values():

600

renamedeleteset.update(dsts)

603

renamedeleteset.update(dsts)

601

604

602

# find interesting file sets from manifests

605

# find interesting file sets from manifests

603

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

606

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

604

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

607

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

605

u1 = sorted(addedinm1 - addedinm2)

608

u1 = sorted(addedinm1 - addedinm2)

606

u2 = sorted(addedinm2 - addedinm1)

609

u2 = sorted(addedinm2 - addedinm1)

607

610

608

header = b" unmatched files in %s"

611

header = b" unmatched files in %s"

609

if u1:

612

if u1:

610

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

613

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

611

if u2:

614

if u2:

612

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

615

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

613

616

614

fullcopy = copies1.copy()

617

fullcopy = copies1.copy()

615

fullcopy.update(copies2)

618

fullcopy.update(copies2)

616

if not fullcopy:

619

if not fullcopy:

617

return copy, {}, diverge, renamedelete, {}

620

return copy, {}, diverge, renamedelete, {}

618

621

619

if repo.ui.debugflag:

622

if repo.ui.debugflag:

620

repo.ui.debug(

623

repo.ui.debug(

621

b" all copies found (* = to merge, ! = divergent, "

624

b" all copies found (* = to merge, ! = divergent, "

622

b"% = renamed and deleted):\n"

625

b"% = renamed and deleted):\n"

623

)

626

)

624

for f in sorted(fullcopy):

627

for f in sorted(fullcopy):

625

note = b""

628

note = b""

626

if f in copy:

629

if f in copy:

627

note += b"*"

630

note += b"*"

628

if f in divergeset:

631

if f in divergeset:

629

note += b"!"

632

note += b"!"

630

if f in renamedeleteset:

633

if f in renamedeleteset:

631

note += b"%"

634

note += b"%"

632

repo.ui.debug(

635

repo.ui.debug(

633

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

636

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

634

)

637

)

635

del divergeset

638

del divergeset

636

639

637

repo.ui.debug(b" checking for directory renames\n")

640

repo.ui.debug(b" checking for directory renames\n")

638

641

639

# generate a directory move map

642

# generate a directory move map

640

d1, d2 = c1.dirs(), c2.dirs()

643

d1, d2 = c1.dirs(), c2.dirs()

641

invalid = set()

644

invalid = set()

642

dirmove = {}

645

dirmove = {}

643

646

644

# examine each file copy for a potential directory move, which is

647

# examine each file copy for a potential directory move, which is

645

# when all the files in a directory are moved to a new directory

648

# when all the files in a directory are moved to a new directory

646

for dst, src in pycompat.iteritems(fullcopy):

649

for dst, src in pycompat.iteritems(fullcopy):

647

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

650

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

648

if dsrc in invalid:

651

if dsrc in invalid:

649

# already seen to be uninteresting

652

# already seen to be uninteresting

650

continue

653

continue

651

elif dsrc in d1 and ddst in d1:

654

elif dsrc in d1 and ddst in d1:

652

# directory wasn't entirely moved locally

655

# directory wasn't entirely moved locally

653

invalid.add(dsrc)

656

invalid.add(dsrc)

654

elif dsrc in d2 and ddst in d2:

657

elif dsrc in d2 and ddst in d2:

655

# directory wasn't entirely moved remotely

658

# directory wasn't entirely moved remotely

656

invalid.add(dsrc)

659

invalid.add(dsrc)

657

elif dsrc in dirmove and dirmove[dsrc] != ddst:

660

elif dsrc in dirmove and dirmove[dsrc] != ddst:

658

# files from the same directory moved to two different places

661

# files from the same directory moved to two different places

659

invalid.add(dsrc)

662

invalid.add(dsrc)

660

else:

663

else:

661

# looks good so far

664

# looks good so far

662

dirmove[dsrc] = ddst

665

dirmove[dsrc] = ddst

663

666

664

for i in invalid:

667

for i in invalid:

665

if i in dirmove:

668

if i in dirmove:

666

del dirmove[i]

669

del dirmove[i]

667

del d1, d2, invalid

670

del d1, d2, invalid

668

671

669

if not dirmove:

672

if not dirmove:

670

return copy, {}, diverge, renamedelete, {}

673

return copy, {}, diverge, renamedelete, {}

671

674

672

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

675

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

673

676

674

for d in dirmove:

677

for d in dirmove:

675

repo.ui.debug(

678

repo.ui.debug(

676

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

679

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

677

)

680

)

678

681

679

movewithdir = {}

682

movewithdir = {}

680

# check unaccounted nonoverlapping files against directory moves

683

# check unaccounted nonoverlapping files against directory moves

681

for f in u1 + u2:

684

for f in u1 + u2:

682

if f not in fullcopy:

685

if f not in fullcopy:

683

for d in dirmove:

686

for d in dirmove:

684

if f.startswith(d):

687

if f.startswith(d):

685

# new file added in a directory that was moved, move it

688

# new file added in a directory that was moved, move it

686

df = dirmove[d] + f[len(d) :]

689

df = dirmove[d] + f[len(d) :]

687

if df not in copy:

690

if df not in copy:

688

movewithdir[f] = df

691

movewithdir[f] = df

689

repo.ui.debug(

692

repo.ui.debug(

690

b" pending file src: '%s' -> dst: '%s'\n"

693

b" pending file src: '%s' -> dst: '%s'\n"

691

% (f, df)

694

% (f, df)

692

)

695

)

693

break

696

break

694

697

695

return copy, movewithdir, diverge, renamedelete, dirmove

698

return copy, movewithdir, diverge, renamedelete, dirmove

696

699

697

700

698

def _heuristicscopytracing(repo, c1, c2, base):

701

def _heuristicscopytracing(repo, c1, c2, base):

699

""" Fast copytracing using filename heuristics

702

""" Fast copytracing using filename heuristics

700

703

701

Assumes that moves or renames are of following two types:

704

Assumes that moves or renames are of following two types:

702

705

703

1) Inside a directory only (same directory name but different filenames)

706

1) Inside a directory only (same directory name but different filenames)

704

2) Move from one directory to another

707

2) Move from one directory to another

705

(same filenames but different directory names)

708

(same filenames but different directory names)

706

709

707

Works only when there are no merge commits in the "source branch".

710

Works only when there are no merge commits in the "source branch".

708

Source branch is commits from base up to c2 not including base.

711

Source branch is commits from base up to c2 not including base.

709

712

710

If merge is involved it fallbacks to _fullcopytracing().

713

If merge is involved it fallbacks to _fullcopytracing().

711

714

712

Can be used by setting the following config:

715

Can be used by setting the following config:

713

716

714

[experimental]

717

[experimental]

715

copytrace = heuristics

718

copytrace = heuristics

716

719

717

In some cases the copy/move candidates found by heuristics can be very large

720

In some cases the copy/move candidates found by heuristics can be very large

718

in number and that will make the algorithm slow. The number of possible

721

in number and that will make the algorithm slow. The number of possible

719

candidates to check can be limited by using the config

722

candidates to check can be limited by using the config

720

`experimental.copytrace.movecandidateslimit` which defaults to 100.

723

`experimental.copytrace.movecandidateslimit` which defaults to 100.

721

"""

724

"""

722

725

723

if c1.rev() is None:

726

if c1.rev() is None:

724

c1 = c1.p1()

727

c1 = c1.p1()

725

if c2.rev() is None:

728

if c2.rev() is None:

726

c2 = c2.p1()

729

c2 = c2.p1()

727

730

728

copies = {}

731

copies = {}

729

732

730

changedfiles = set()

733

changedfiles = set()

731

m1 = c1.manifest()

734

m1 = c1.manifest()

732

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

735

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

733

# If base is not in c2 branch, we switch to fullcopytracing

736

# If base is not in c2 branch, we switch to fullcopytracing

734

repo.ui.debug(

737

repo.ui.debug(

735

b"switching to full copytracing as base is not "

738

b"switching to full copytracing as base is not "

736

b"an ancestor of c2\n"

739

b"an ancestor of c2\n"

737

)

740

)

738

return _fullcopytracing(repo, c1, c2, base)

741

return _fullcopytracing(repo, c1, c2, base)

739

742

740

ctx = c2

743

ctx = c2

741

while ctx != base:

744

while ctx != base:

742

if len(ctx.parents()) == 2:

745

if len(ctx.parents()) == 2:

743

# To keep things simple let's not handle merges

746

# To keep things simple let's not handle merges

744

repo.ui.debug(b"switching to full copytracing because of merges\n")

747

repo.ui.debug(b"switching to full copytracing because of merges\n")

745

return _fullcopytracing(repo, c1, c2, base)

748

return _fullcopytracing(repo, c1, c2, base)

746

changedfiles.update(ctx.files())

749

changedfiles.update(ctx.files())

747

ctx = ctx.p1()

750

ctx = ctx.p1()

748

751

749

cp = _forwardcopies(base, c2)

752

cp = _forwardcopies(base, c2)

750

for dst, src in pycompat.iteritems(cp):

753

for dst, src in pycompat.iteritems(cp):

751

if src in m1:

754

if src in m1:

752

copies[dst] = src

755

copies[dst] = src

753

756

754

# file is missing if it isn't present in the destination, but is present in

757

# file is missing if it isn't present in the destination, but is present in

755

# the base and present in the source.

758

# the base and present in the source.

756

# Presence in the base is important to exclude added files, presence in the

759

# Presence in the base is important to exclude added files, presence in the

757

# source is important to exclude removed files.

760

# source is important to exclude removed files.

758

filt = lambda f: f not in m1 and f in base and f in c2

761

filt = lambda f: f not in m1 and f in base and f in c2

759

missingfiles = [f for f in changedfiles if filt(f)]

762

missingfiles = [f for f in changedfiles if filt(f)]

760

763

761

if missingfiles:

764

if missingfiles:

762

basenametofilename = collections.defaultdict(list)

765

basenametofilename = collections.defaultdict(list)

763

dirnametofilename = collections.defaultdict(list)

766

dirnametofilename = collections.defaultdict(list)

764

767

765

for f in m1.filesnotin(base.manifest()):

768

for f in m1.filesnotin(base.manifest()):

766

basename = os.path.basename(f)

769

basename = os.path.basename(f)

767

dirname = os.path.dirname(f)

770

dirname = os.path.dirname(f)

768

basenametofilename[basename].append(f)

771

basenametofilename[basename].append(f)

769

dirnametofilename[dirname].append(f)

772

dirnametofilename[dirname].append(f)

770

773

771

for f in missingfiles:

774

for f in missingfiles:

772

basename = os.path.basename(f)

775

basename = os.path.basename(f)

773

dirname = os.path.dirname(f)

776

dirname = os.path.dirname(f)

774

samebasename = basenametofilename[basename]

777

samebasename = basenametofilename[basename]

775

samedirname = dirnametofilename[dirname]

778

samedirname = dirnametofilename[dirname]

776

movecandidates = samebasename + samedirname

779

movecandidates = samebasename + samedirname

777

# f is guaranteed to be present in c2, that's why

780

# f is guaranteed to be present in c2, that's why

778

# c2.filectx(f) won't fail

781

# c2.filectx(f) won't fail

779

f2 = c2.filectx(f)

782

f2 = c2.filectx(f)

780

# we can have a lot of candidates which can slow down the heuristics

783

# we can have a lot of candidates which can slow down the heuristics

781

# config value to limit the number of candidates moves to check

784

# config value to limit the number of candidates moves to check

782

maxcandidates = repo.ui.configint(

785

maxcandidates = repo.ui.configint(

783

b'experimental', b'copytrace.movecandidateslimit'

786

b'experimental', b'copytrace.movecandidateslimit'

784

)

787

)

785

788

786

if len(movecandidates) > maxcandidates:

789

if len(movecandidates) > maxcandidates:

787

repo.ui.status(

790

repo.ui.status(

788

_(

791

_(

789

b"skipping copytracing for '%s', more "

792

b"skipping copytracing for '%s', more "

790

b"candidates than the limit: %d\n"

793

b"candidates than the limit: %d\n"

791

)

794

)

792

% (f, len(movecandidates))

795

% (f, len(movecandidates))

793

)

796

)

794

continue

797

continue

795

798

796

for candidate in movecandidates:

799

for candidate in movecandidates:

797

f1 = c1.filectx(candidate)

800

f1 = c1.filectx(candidate)

798

if _related(f1, f2):

801

if _related(f1, f2):

799

# if there are a few related copies then we'll merge

802

# if there are a few related copies then we'll merge

800

# changes into all of them. This matches the behaviour

803

# changes into all of them. This matches the behaviour

801

# of upstream copytracing

804

# of upstream copytracing

802

copies[candidate] = f

805

copies[candidate] = f

803

806

804

return copies, {}, {}, {}, {}

807

return copies, {}, {}, {}, {}

805

808

806

809

807

def _related(f1, f2):

810

def _related(f1, f2):

808

"""return True if f1 and f2 filectx have a common ancestor

811

"""return True if f1 and f2 filectx have a common ancestor

809

812

810

Walk back to common ancestor to see if the two files originate

813

Walk back to common ancestor to see if the two files originate

811

from the same file. Since workingfilectx's rev() is None it messes

814

from the same file. Since workingfilectx's rev() is None it messes

812

up the integer comparison logic, hence the pre-step check for

815

up the integer comparison logic, hence the pre-step check for

813

None (f1 and f2 can only be workingfilectx's initially).

816

None (f1 and f2 can only be workingfilectx's initially).

814

"""

817

"""

815

818

816

if f1 == f2:

819

if f1 == f2:

817

return True # a match

820

return True # a match

818

821

819

g1, g2 = f1.ancestors(), f2.ancestors()

822

g1, g2 = f1.ancestors(), f2.ancestors()

820

try:

823

try:

821

f1r, f2r = f1.linkrev(), f2.linkrev()

824

f1r, f2r = f1.linkrev(), f2.linkrev()

822

825

823

if f1r is None:

826

if f1r is None:

824

f1 = next(g1)

827

f1 = next(g1)

825

if f2r is None:

828

if f2r is None:

826

f2 = next(g2)

829

f2 = next(g2)

827

830

828

while True:

831

while True:

829

f1r, f2r = f1.linkrev(), f2.linkrev()

832

f1r, f2r = f1.linkrev(), f2.linkrev()

830

if f1r > f2r:

833

if f1r > f2r:

831

f1 = next(g1)

834

f1 = next(g1)

832

elif f2r > f1r:

835

elif f2r > f1r:

833

f2 = next(g2)

836

f2 = next(g2)

834

else: # f1 and f2 point to files in the same linkrev

837

else: # f1 and f2 point to files in the same linkrev

835

return f1 == f2 # true if they point to the same file

838

return f1 == f2 # true if they point to the same file

836

except StopIteration:

839

except StopIteration:

837

return False

840

return False

838

841

839

842

840

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

843

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

841

"""reproduce copies from fromrev to rev in the dirstate

844

"""reproduce copies from fromrev to rev in the dirstate

842

845

843

If skiprev is specified, it's a revision that should be used to

846

If skiprev is specified, it's a revision that should be used to

844

filter copy records. Any copies that occur between fromrev and

847

filter copy records. Any copies that occur between fromrev and

845

skiprev will not be duplicated, even if they appear in the set of

848

skiprev will not be duplicated, even if they appear in the set of

846

copies between fromrev and rev.

849

copies between fromrev and rev.

847

"""

850

"""

848

exclude = {}

851

exclude = {}

849

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

852

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

850

bctrace = stringutil.parsebool(ctraceconfig)

853

bctrace = stringutil.parsebool(ctraceconfig)

851

if skiprev is not None and (

854

if skiprev is not None and (

852

ctraceconfig == b'heuristics' or bctrace or bctrace is None

855

ctraceconfig == b'heuristics' or bctrace or bctrace is None

853

):

856

):

854

# copytrace='off' skips this line, but not the entire function because

857

# copytrace='off' skips this line, but not the entire function because

855

# the line below is O(size of the repo) during a rebase, while the rest

858

# the line below is O(size of the repo) during a rebase, while the rest

856

# of the function is much faster (and is required for carrying copy

859

# of the function is much faster (and is required for carrying copy

857

# metadata across the rebase anyway).

860

# metadata across the rebase anyway).

858

exclude = pathcopies(repo[fromrev], repo[skiprev])

861

exclude = pathcopies(repo[fromrev], repo[skiprev])

859

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

862

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

860

if dst in exclude:

863

if dst in exclude:

861

continue

864

continue

862

if dst in wctx:

865

if dst in wctx:

863

wctx[dst].markcopied(src)

866

wctx[dst].markcopied(src)

864

867

865

868

866

def computechangesetfilesadded(ctx):

869

def computechangesetfilesadded(ctx):

867

"""return the list of files added in a changeset

870

"""return the list of files added in a changeset

868

"""

871

"""

869

added = []

872

added = []

870

for f in ctx.files():

873

for f in ctx.files():

871

if not any(f in p for p in ctx.parents()):

874

if not any(f in p for p in ctx.parents()):

872

added.append(f)

875

added.append(f)

873

return added

876

return added

874

877

875

878

876

def computechangesetfilesremoved(ctx):

879

def computechangesetfilesremoved(ctx):

877

"""return the list of files removed in a changeset

880

"""return the list of files removed in a changeset

878

"""

881

"""

879

removed = []

882

removed = []

880

for f in ctx.files():

883

for f in ctx.files():

881

if f not in ctx:

884

if f not in ctx:

882

removed.append(f)

885

removed.append(f)

883

return removed

886

return removed

884

887

885

888

886

def computechangesetcopies(ctx):

889

def computechangesetcopies(ctx):

887

"""return the copies data for a changeset

890

"""return the copies data for a changeset

888

891

889

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

892

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

890

893

891

Each dictionnary are in the form: `{newname: oldname}`

894

Each dictionnary are in the form: `{newname: oldname}`

892

"""

895

"""

893

p1copies = {}

896

p1copies = {}

894

p2copies = {}

897

p2copies = {}

895

p1 = ctx.p1()

898

p1 = ctx.p1()

896

p2 = ctx.p2()

899

p2 = ctx.p2()

897

narrowmatch = ctx._repo.narrowmatch()

900

narrowmatch = ctx._repo.narrowmatch()

898

for dst in ctx.files():

901

for dst in ctx.files():

899

if not narrowmatch(dst) or dst not in ctx:

902

if not narrowmatch(dst) or dst not in ctx:

900

continue

903

continue

901

copied = ctx[dst].renamed()

904

copied = ctx[dst].renamed()

902

if not copied:

905

if not copied:

903

continue

906

continue

904

src, srcnode = copied

907

src, srcnode = copied

905

if src in p1 and p1[src].filenode() == srcnode:

908

if src in p1 and p1[src].filenode() == srcnode:

906

p1copies[dst] = src

909

p1copies[dst] = src

907

elif src in p2 and p2[src].filenode() == srcnode:

910

elif src in p2 and p2[src].filenode() == srcnode:

908

p2copies[dst] = src

911

p2copies[dst] = src

909

return p1copies, p2copies

912

return p1copies, p2copies

910

913

911

914

912

def encodecopies(files, copies):

915

def encodecopies(files, copies):

913

items = []

916

items = []

914

for i, dst in enumerate(files):

917

for i, dst in enumerate(files):

915

if dst in copies:

918

if dst in copies:

916

items.append(b'%d\0%s' % (i, copies[dst]))

919

items.append(b'%d\0%s' % (i, copies[dst]))

917

if len(items) != len(copies):

920

if len(items) != len(copies):

918

raise error.ProgrammingError(

921

raise error.ProgrammingError(

919

b'some copy targets missing from file list'

922

b'some copy targets missing from file list'

920

)

923

)

921

return b"\n".join(items)

924

return b"\n".join(items)

922

925

923

926

924

def decodecopies(files, data):

927

def decodecopies(files, data):

925

try:

928

try:

926

copies = {}

929

copies = {}

927

if not data:

930

if not data:

928

return copies

931

return copies

929

for l in data.split(b'\n'):

932

for l in data.split(b'\n'):

930

strindex, src = l.split(b'\0')

933

strindex, src = l.split(b'\0')

931

i = int(strindex)

934

i = int(strindex)

932

dst = files[i]

935

dst = files[i]

933

copies[dst] = src

936

copies[dst] = src

934

return copies

937

return copies

935

except (ValueError, IndexError):

938

except (ValueError, IndexError):

936

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

939

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

937

# used different syntax for the value.

940

# used different syntax for the value.

938

return None

941

return None

939

942

940

943

941

def encodefileindices(files, subset):

944

def encodefileindices(files, subset):

942

subset = set(subset)

945

subset = set(subset)

943

indices = []

946

indices = []

944

for i, f in enumerate(files):

947

for i, f in enumerate(files):

945

if f in subset:

948

if f in subset:

946

indices.append(b'%d' % i)

949

indices.append(b'%d' % i)

947

return b'\n'.join(indices)

950

return b'\n'.join(indices)

948

951

949

952

950

def decodefileindices(files, data):

953

def decodefileindices(files, data):

951

try:

954

try:

952

subset = []

955

subset = []

953

if not data:

956

if not data:

954

return subset

957

return subset

955

for strindex in data.split(b'\n'):

958

for strindex in data.split(b'\n'):

956

i = int(strindex)

959

i = int(strindex)

957

if i < 0 or i >= len(files):

960

if i < 0 or i >= len(files):

958

return None

961

return None

959

subset.append(files[i])

962

subset.append(files[i])

960

return subset

963

return subset

961

except (ValueError, IndexError):

964

except (ValueError, IndexError):

962

# Perhaps someone had chosen the same key name (e.g. "added") and

965

# Perhaps someone had chosen the same key name (e.g. "added") and

963

# used different syntax for the value.

966

# used different syntax for the value.

964

return None

967

return None

965

968

966

969

967

def _getsidedata(srcrepo, rev):

970

def _getsidedata(srcrepo, rev):

968

ctx = srcrepo[rev]

971

ctx = srcrepo[rev]

969

filescopies = computechangesetcopies(ctx)

972

filescopies = computechangesetcopies(ctx)

970

filesadded = computechangesetfilesadded(ctx)

973

filesadded = computechangesetfilesadded(ctx)

971

filesremoved = computechangesetfilesremoved(ctx)

974

filesremoved = computechangesetfilesremoved(ctx)

972

sidedata = {}

975

sidedata = {}

973

if any([filescopies, filesadded, filesremoved]):

976

if any([filescopies, filesadded, filesremoved]):

974

sortedfiles = sorted(ctx.files())

977

sortedfiles = sorted(ctx.files())

975

p1copies, p2copies = filescopies

978

p1copies, p2copies = filescopies

976

p1copies = encodecopies(sortedfiles, p1copies)

979

p1copies = encodecopies(sortedfiles, p1copies)

977

p2copies = encodecopies(sortedfiles, p2copies)

980

p2copies = encodecopies(sortedfiles, p2copies)

978

filesadded = encodefileindices(sortedfiles, filesadded)

981

filesadded = encodefileindices(sortedfiles, filesadded)

979

filesremoved = encodefileindices(sortedfiles, filesremoved)

982

filesremoved = encodefileindices(sortedfiles, filesremoved)

980

if p1copies:

983

if p1copies:

981

sidedata[sidedatamod.SD_P1COPIES] = p1copies

984

sidedata[sidedatamod.SD_P1COPIES] = p1copies

982

if p2copies:

985

if p2copies:

983

sidedata[sidedatamod.SD_P2COPIES] = p2copies

986

sidedata[sidedatamod.SD_P2COPIES] = p2copies

984

if filesadded:

987

if filesadded:

985

sidedata[sidedatamod.SD_FILESADDED] = filesadded

988

sidedata[sidedatamod.SD_FILESADDED] = filesadded

986

if filesremoved:

989

if filesremoved:

987

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

990

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

988

return sidedata

991

return sidedata

989

992

990

993

991

def getsidedataadder(srcrepo, destrepo):

994

def getsidedataadder(srcrepo, destrepo):

992

def sidedatacompanion(revlog, rev):

995

def sidedatacompanion(revlog, rev):

993

sidedata = {}

996

sidedata = {}

994

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

997

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

995

sidedata = _getsidedata(srcrepo, rev)

998

sidedata = _getsidedata(srcrepo, rev)

996

return False, (), sidedata

999

return False, (), sidedata

997

1000

998

return sidedatacompanion

1001

return sidedatacompanion

999

1002

1000

1003

1001

def getsidedataremover(srcrepo, destrepo):

1004

def getsidedataremover(srcrepo, destrepo):

1002

def sidedatacompanion(revlog, rev):

1005

def sidedatacompanion(revlog, rev):

1003

f = ()

1006

f = ()

1004

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1007

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1005

if revlog.flags(rev) & REVIDX_SIDEDATA:

1008

if revlog.flags(rev) & REVIDX_SIDEDATA:

1006

f = (

1009

f = (

1007

sidedatamod.SD_P1COPIES,

1010

sidedatamod.SD_P1COPIES,

1008

sidedatamod.SD_P2COPIES,

1011

sidedatamod.SD_P2COPIES,

1009

sidedatamod.SD_FILESADDED,

1012

sidedatamod.SD_FILESADDED,

1010

sidedatamod.SD_FILESREMOVED,

1013

sidedatamod.SD_FILESREMOVED,

1011

)

1014

)

1012

return False, f, {}

1015

return False, f, {}

1013

1016

1014

return sidedatacompanion

1017

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     # A small cache to avoid doing the work twice for merges
                     #
                     # In the vast majority of cases, if we ask information for a revision
                     # about 1 parent, we'll later ask it for the other. So it make sense to
                     # keep the information around when reaching the first parent of a merge
                     # and dropping it after it was provided for the second parents.
                     #
                     # It exists cases were only one parent of the merge will be walked. It
                     # happens when the "destination" the copy tracing is descendant from a
                     # new root, not common with the "source". In that case, we will only walk
                     # through merge parents that are descendant of changesets common
                     # between "source" and "destination".
                     #
                     # With the current case implementation if such changesets have a copy
                     # information, we'll keep them in memory until the end of
                     # _changesetforwardcopies. We don't expect the case to be frequent
                     # enough to matters.
                     #
                     # In addition, it would be possible to reach pathological case, were
                     # many first parent are met before any second parent is reached. In
                     # that case the cache could grow. If this even become an issue one can
                     # safely introduce a maximum cache size. This would trade extra CPU/IO
                     # time to save memory.
                     merge_caches = {}
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
                             e = merge_caches.pop(rev, None)
                             if e is not None:
                                 return e
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                             if p1 != node.nullrev and p2 != node.nullrev:
                                 # XXX some case we over cache, IGNORE
                                 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = []
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
-                all_copies = {r: {} for r in roots}
+                all_copies = {}
                 alwaysmatch = match.always()
                 for r in sorted(iterrevs):
-                    copies = all_copies.pop(r)
+                    copies = all_copies.pop(r, None)
+                    if copies is None:
+                        # this is a root
+                        copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                             # _chain makes a copies, we can avoid doing so in some
                             # simple/linear cases.
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 return all_copies[b.rev()]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion