upstream/mercurial-mirror Commit - r43469:c16fe77e

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _findlimit(repo, ctxa, ctxb):

33

def _findlimit(repo, ctxa, ctxb):

34

"""

34

"""

35

Find the last revision that needs to be checked to ensure that a full

35

Find the last revision that needs to be checked to ensure that a full

36

transitive closure for file copies can be properly calculated.

36

transitive closure for file copies can be properly calculated.

37

Generally, this means finding the earliest revision number that's an

37

Generally, this means finding the earliest revision number that's an

38

ancestor of a or b but not both, except when a or b is a direct descendent

38

ancestor of a or b but not both, except when a or b is a direct descendent

39

of the other, in which case we can return the minimum revnum of a and b.

39

of the other, in which case we can return the minimum revnum of a and b.

40

"""

40

"""

41

42

# basic idea:

42

# basic idea:

43

# - mark a and b with different sides

43

# - mark a and b with different sides

44

# - if a parent's children are all on the same side, the parent is

44

# - if a parent's children are all on the same side, the parent is

45

# on that side, otherwise it is on no side

45

# on that side, otherwise it is on no side

46

# - walk the graph in topological order with the help of a heap;

46

# - walk the graph in topological order with the help of a heap;

47

# - add unseen parents to side map

47

# - add unseen parents to side map

48

# - clear side of any parent that has children on different sides

48

# - clear side of any parent that has children on different sides

49

# - track number of interesting revs that might still be on a side

49

# - track number of interesting revs that might still be on a side

50

# - track the lowest interesting rev seen

50

# - track the lowest interesting rev seen

51

# - quit when interesting revs is zero

51

# - quit when interesting revs is zero

52

53

cl = repo.changelog

53

cl = repo.changelog

54

wdirparents = None

54

wdirparents = None

55

a = ctxa.rev()

55

a = ctxa.rev()

56

b = ctxb.rev()

56

b = ctxb.rev()

57

if a is None:

57

if a is None:

58

wdirparents = (ctxa.p1(), ctxa.p2())

58

wdirparents = (ctxa.p1(), ctxa.p2())

59

a = node.wdirrev

59

a = node.wdirrev

60

if b is None:

60

if b is None:

61

assert not wdirparents

61

assert not wdirparents

62

wdirparents = (ctxb.p1(), ctxb.p2())

62

wdirparents = (ctxb.p1(), ctxb.p2())

63

b = node.wdirrev

63

b = node.wdirrev

64

65

side = {a: -1, b: 1}

65

side = {a: -1, b: 1}

66

visit = [-a, -b]

66

visit = [-a, -b]

67

heapq.heapify(visit)

67

heapq.heapify(visit)

68

interesting = len(visit)

68

interesting = len(visit)

69

limit = node.wdirrev

69

limit = node.wdirrev

70

71

while interesting:

71

while interesting:

72

r = -(heapq.heappop(visit))

72

r = -(heapq.heappop(visit))

73

if r == node.wdirrev:

73

if r == node.wdirrev:

74

parents = [pctx.rev() for pctx in wdirparents]

74

parents = [pctx.rev() for pctx in wdirparents]

75

else:

75

else:

76

parents = cl.parentrevs(r)

76

parents = cl.parentrevs(r)

77

if parents[1] == node.nullrev:

77

if parents[1] == node.nullrev:

78

parents = parents[:1]

78

parents = parents[:1]

79

for p in parents:

79

for p in parents:

80

if p not in side:

80

if p not in side:

81

# first time we see p; add it to visit

81

# first time we see p; add it to visit

82

side[p] = side[r]

82

side[p] = side[r]

83

if side[p]:

83

if side[p]:

84

interesting += 1

84

interesting += 1

85

heapq.heappush(visit, -p)

85

heapq.heappush(visit, -p)

86

elif side[p] and side[p] != side[r]:

86

elif side[p] and side[p] != side[r]:

87

# p was interesting but now we know better

87

# p was interesting but now we know better

88

side[p] = 0

88

side[p] = 0

89

interesting -= 1

89

interesting -= 1

90

if side[r]:

90

if side[r]:

91

limit = r # lowest rev visited

91

limit = r # lowest rev visited

92

interesting -= 1

92

interesting -= 1

93

94

# Consider the following flow (see test-commit-amend.t under issue4405):

94

# Consider the following flow (see test-commit-amend.t under issue4405):

95

# 1/ File 'a0' committed

95

# 1/ File 'a0' committed

96

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

96

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

97

# 3/ Move back to first commit

97

# 3/ Move back to first commit

98

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

98

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

99

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

99

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

100

#

100

#

101

# During the amend in step five, we will be in this state:

101

# During the amend in step five, we will be in this state:

102

#

102

#

103

# @ 3 temporary amend commit for a1-amend

103

# @ 3 temporary amend commit for a1-amend

104

# |

104

# |

105

# o 2 a1-amend

105

# o 2 a1-amend

106

# |

106

# |

107

# | o 1 a1

107

# | o 1 a1

108

# |/

108

# |/

109

# o 0 a0

109

# o 0 a0

110

#

110

#

111

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

111

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

112

# yet the filelog has the copy information in rev 1 and we will not look

112

# yet the filelog has the copy information in rev 1 and we will not look

113

# back far enough unless we also look at the a and b as candidates.

113

# back far enough unless we also look at the a and b as candidates.

114

# This only occurs when a is a descendent of b or visa-versa.

114

# This only occurs when a is a descendent of b or visa-versa.

115

return min(limit, a, b)

115

return min(limit, a, b)

116

117

118

def _filter(src, dst, t):

118

def _filter(src, dst, t):

119

"""filters out invalid copies after chaining"""

119

"""filters out invalid copies after chaining"""

120

121

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

121

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

122

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

122

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

123

# in the following table (not including trivial cases). For example, case 2

123

# in the following table (not including trivial cases). For example, case 2

124

# is where a file existed in 'src' and remained under that name in 'mid' and

124

# is where a file existed in 'src' and remained under that name in 'mid' and

125

# then was renamed between 'mid' and 'dst'.

125

# then was renamed between 'mid' and 'dst'.

126

#

126

#

127

# case src mid dst result

127

# case src mid dst result

128

# 1 x y - -

128

# 1 x y - -

129

# 2 x y y x->y

129

# 2 x y y x->y

130

# 3 x y x -

130

# 3 x y x -

131

# 4 x y z x->z

131

# 4 x y z x->z

132

# 5 - x y -

132

# 5 - x y -

133

# 6 x x y x->y

133

# 6 x x y x->y

134

#

134

#

135

# _chain() takes care of chaining the copies in 'a' and 'b', but it

135

# _chain() takes care of chaining the copies in 'a' and 'b', but it

136

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

136

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

137

# between 5 and 6, so it includes all cases in its result.

137

# between 5 and 6, so it includes all cases in its result.

138

# Cases 1, 3, and 5 are then removed by _filter().

138

# Cases 1, 3, and 5 are then removed by _filter().

139

140

for k, v in list(t.items()):

140

for k, v in list(t.items()):

141

# remove copies from files that didn't exist

141

# remove copies from files that didn't exist

142

if v not in src:

142

if v not in src:

143

del t[k]

143

del t[k]

144

# remove criss-crossed copies

144

# remove criss-crossed copies

145

elif k in src and v in dst:

145

elif k in src and v in dst:

146

del t[k]

146

del t[k]

147

# remove copies to files that were then removed

147

# remove copies to files that were then removed

148

elif k not in dst:

148

elif k not in dst:

149

del t[k]

149

del t[k]

150

151

152

def _chain(a, b):

152

def _chain(a, b):

153

"""chain two sets of copies 'a' and 'b'"""

153

"""chain two sets of copies 'a' and 'b'"""

154

t = a.copy()

154

t = a.copy()

155

for k, v in pycompat.iteritems(b):

155

for k, v in pycompat.iteritems(b):

156

if v in t:

156

if v in t:

157

t[k] = t[v]

157

t[k] = t[v]

158

else:

158

else:

159

t[k] = v

159

t[k] = v

160

return t

160

return t

161

162

163

def _tracefile(fctx, am, basemf, limit):

163

def _tracefile(fctx, am, basemf, limit):

164

"""return file context that is the ancestor of fctx present in ancestor

164

"""return file context that is the ancestor of fctx present in ancestor

165

manifest am, stopping after the first ancestor lower than limit"""

165

manifest am

166

167

Note: we used to try and stop after a given limit, however checking if that

168

limit is reached turned out to be very expensive. we are better off

169

disabling that feature."""

166

170

167

for f in fctx.ancestors():

171

for f in fctx.ancestors():

168

path = f.path()

172

path = f.path()

169

if am.get(path, None) == f.filenode():

173

if am.get(path, None) == f.filenode():

170

return path

174

return path

171

if basemf and basemf.get(path, None) == f.filenode():

175

if basemf and basemf.get(path, None) == f.filenode():

172

return path

176

return path

173

if not f.isintroducedafter(limit):

174

return None

175

177

176

178

177

def _dirstatecopies(repo, match=None):

179

def _dirstatecopies(repo, match=None):

178

ds = repo.dirstate

180

ds = repo.dirstate

179

c = ds.copies().copy()

181

c = ds.copies().copy()

180

for k in list(c):

182

for k in list(c):

181

if ds[k] not in b'anm' or (match and not match(k)):

183

if ds[k] not in b'anm' or (match and not match(k)):

182

del c[k]

184

del c[k]

183

return c

185

return c

184

186

185

187

186

def _computeforwardmissing(a, b, match=None):

188

def _computeforwardmissing(a, b, match=None):

187

"""Computes which files are in b but not a.

189

"""Computes which files are in b but not a.

188

This is its own function so extensions can easily wrap this call to see what

190

This is its own function so extensions can easily wrap this call to see what

189

files _forwardcopies is about to process.

191

files _forwardcopies is about to process.

190

"""

192

"""

191

ma = a.manifest()

193

ma = a.manifest()

192

mb = b.manifest()

194

mb = b.manifest()

193

return mb.filesnotin(ma, match=match)

195

return mb.filesnotin(ma, match=match)

194

196

195

197

196

def usechangesetcentricalgo(repo):

198

def usechangesetcentricalgo(repo):

197

"""Checks if we should use changeset-centric copy algorithms"""

199

"""Checks if we should use changeset-centric copy algorithms"""

198

if repo.filecopiesmode == b'changeset-sidedata':

200

if repo.filecopiesmode == b'changeset-sidedata':

199

return True

201

return True

200

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

202

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

201

changesetsource = (b'changeset-only', b'compatibility')

203

changesetsource = (b'changeset-only', b'compatibility')

202

return readfrom in changesetsource

204

return readfrom in changesetsource

203

205

204

206

205

def _committedforwardcopies(a, b, base, match):

207

def _committedforwardcopies(a, b, base, match):

206

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

208

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

207

# files might have to be traced back to the fctx parent of the last

209

# files might have to be traced back to the fctx parent of the last

208

# one-side-only changeset, but not further back than that

210

# one-side-only changeset, but not further back than that

209

repo = a._repo

211

repo = a._repo

210

212

211

if usechangesetcentricalgo(repo):

213

if usechangesetcentricalgo(repo):

212

return _changesetforwardcopies(a, b, match)

214

return _changesetforwardcopies(a, b, match)

213

215

214

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

216

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

215

dbg = repo.ui.debug

217

dbg = repo.ui.debug

216

if debug:

218

if debug:

217

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

219

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

218

limit = _findlimit(repo, a, b)

220

limit = _findlimit(repo, a, b)

219

if debug:

221

if debug:

220

dbg(b'debug.copies: search limit: %d\n' % limit)

222

dbg(b'debug.copies: search limit: %d\n' % limit)

221

am = a.manifest()

223

am = a.manifest()

222

basemf = None if base is None else base.manifest()

224

basemf = None if base is None else base.manifest()

223

225

224

# find where new files came from

226

# find where new files came from

225

# we currently don't try to find where old files went, too expensive

227

# we currently don't try to find where old files went, too expensive

226

# this means we can miss a case like 'hg rm b; hg cp a b'

228

# this means we can miss a case like 'hg rm b; hg cp a b'

227

cm = {}

229

cm = {}

228

230

229

# Computing the forward missing is quite expensive on large manifests, since

231

# Computing the forward missing is quite expensive on large manifests, since

230

# it compares the entire manifests. We can optimize it in the common use

232

# it compares the entire manifests. We can optimize it in the common use

231

# case of computing what copies are in a commit versus its parent (like

233

# case of computing what copies are in a commit versus its parent (like

232

# during a rebase or histedit). Note, we exclude merge commits from this

234

# during a rebase or histedit). Note, we exclude merge commits from this

233

# optimization, since the ctx.files() for a merge commit is not correct for

235

# optimization, since the ctx.files() for a merge commit is not correct for

234

# this comparison.

236

# this comparison.

235

forwardmissingmatch = match

237

forwardmissingmatch = match

236

if b.p1() == a and b.p2().node() == node.nullid:

238

if b.p1() == a and b.p2().node() == node.nullid:

237

filesmatcher = matchmod.exact(b.files())

239

filesmatcher = matchmod.exact(b.files())

238

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

240

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

239

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

241

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

240

242

241

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

243

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

242

244

243

if debug:

245

if debug:

244

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

246

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

245

247

246

for f in sorted(missing):

248

for f in sorted(missing):

247

if debug:

249

if debug:

248

dbg(b'debug.copies: tracing file: %s\n' % f)

250

dbg(b'debug.copies: tracing file: %s\n' % f)

249

fctx = b[f]

251

fctx = b[f]

250

fctx._ancestrycontext = ancestrycontext

252

fctx._ancestrycontext = ancestrycontext

251

253

252

if debug:

254

if debug:

253

start = util.timer()

255

start = util.timer()

254

opath = _tracefile(fctx, am, basemf, limit)

256

opath = _tracefile(fctx, am, basemf, limit)

255

if opath:

257

if opath:

256

if debug:

258

if debug:

257

dbg(b'debug.copies: rename of: %s\n' % opath)

259

dbg(b'debug.copies: rename of: %s\n' % opath)

258

cm[f] = opath

260

cm[f] = opath

259

if debug:

261

if debug:

260

dbg(

262

dbg(

261

b'debug.copies: time: %f seconds\n'

263

b'debug.copies: time: %f seconds\n'

262

% (util.timer() - start)

264

% (util.timer() - start)

263

)

265

)

264

return cm

266

return cm

265

267

266

268

267

def _changesetforwardcopies(a, b, match):

269

def _changesetforwardcopies(a, b, match):

268

if a.rev() in (node.nullrev, b.rev()):

270

if a.rev() in (node.nullrev, b.rev()):

269

return {}

271

return {}

270

272

271

repo = a.repo()

273

repo = a.repo()

272

children = {}

274

children = {}

273

cl = repo.changelog

275

cl = repo.changelog

274

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

276

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

275

for r in missingrevs:

277

for r in missingrevs:

276

for p in cl.parentrevs(r):

278

for p in cl.parentrevs(r):

277

if p == node.nullrev:

279

if p == node.nullrev:

278

continue

280

continue

279

if p not in children:

281

if p not in children:

280

children[p] = [r]

282

children[p] = [r]

281

else:

283

else:

282

children[p].append(r)

284

children[p].append(r)

283

285

284

roots = set(children) - set(missingrevs)

286

roots = set(children) - set(missingrevs)

285

# 'work' contains 3-tuples of a (revision number, parent number, copies).

287

# 'work' contains 3-tuples of a (revision number, parent number, copies).

286

# The parent number is only used for knowing which parent the copies dict

288

# The parent number is only used for knowing which parent the copies dict

287

# came from.

289

# came from.

288

# NOTE: To reduce costly copying the 'copies' dicts, we reuse the same

290

# NOTE: To reduce costly copying the 'copies' dicts, we reuse the same

289

# instance for *one* of the child nodes (the last one). Once an instance

291

# instance for *one* of the child nodes (the last one). Once an instance

290

# has been put on the queue, it is thus no longer safe to modify it.

292

# has been put on the queue, it is thus no longer safe to modify it.

291

# Conversely, it *is* safe to modify an instance popped off the queue.

293

# Conversely, it *is* safe to modify an instance popped off the queue.

292

work = [(r, 1, {}) for r in roots]

294

work = [(r, 1, {}) for r in roots]

293

heapq.heapify(work)

295

heapq.heapify(work)

294

alwaysmatch = match.always()

296

alwaysmatch = match.always()

295

while work:

297

while work:

296

r, i1, copies = heapq.heappop(work)

298

r, i1, copies = heapq.heappop(work)

297

if work and work[0][0] == r:

299

if work and work[0][0] == r:

298

# We are tracing copies from both parents

300

# We are tracing copies from both parents

299

r, i2, copies2 = heapq.heappop(work)

301

r, i2, copies2 = heapq.heappop(work)

300

for dst, src in copies2.items():

302

for dst, src in copies2.items():

301

# Unlike when copies are stored in the filelog, we consider

303

# Unlike when copies are stored in the filelog, we consider

302

# it a copy even if the destination already existed on the

304

# it a copy even if the destination already existed on the

303

# other branch. It's simply too expensive to check if the

305

# other branch. It's simply too expensive to check if the

304

# file existed in the manifest.

306

# file existed in the manifest.

305

if dst not in copies:

307

if dst not in copies:

306

# If it was copied on the p1 side, leave it as copied from

308

# If it was copied on the p1 side, leave it as copied from

307

# that side, even if it was also copied on the p2 side.

309

# that side, even if it was also copied on the p2 side.

308

copies[dst] = copies2[dst]

310

copies[dst] = copies2[dst]

309

if r == b.rev():

311

if r == b.rev():

310

return copies

312

return copies

311

for i, c in enumerate(children[r]):

313

for i, c in enumerate(children[r]):

312

childctx = repo[c]

314

childctx = repo[c]

313

if r == childctx.p1().rev():

315

if r == childctx.p1().rev():

314

parent = 1

316

parent = 1

315

childcopies = childctx.p1copies()

317

childcopies = childctx.p1copies()

316

else:

318

else:

317

assert r == childctx.p2().rev()

319

assert r == childctx.p2().rev()

318

parent = 2

320

parent = 2

319

childcopies = childctx.p2copies()

321

childcopies = childctx.p2copies()

320

if not alwaysmatch:

322

if not alwaysmatch:

321

childcopies = {

323

childcopies = {

322

dst: src for dst, src in childcopies.items() if match(dst)

324

dst: src for dst, src in childcopies.items() if match(dst)

323

}

325

}

324

# Copy the dict only if later iterations will also need it

326

# Copy the dict only if later iterations will also need it

325

if i != len(children[r]) - 1:

327

if i != len(children[r]) - 1:

326

newcopies = copies.copy()

328

newcopies = copies.copy()

327

else:

329

else:

328

newcopies = copies

330

newcopies = copies

329

if childcopies:

331

if childcopies:

330

newcopies = _chain(newcopies, childcopies)

332

newcopies = _chain(newcopies, childcopies)

331

for f in childctx.filesremoved():

333

for f in childctx.filesremoved():

332

if f in newcopies:

334

if f in newcopies:

333

del newcopies[f]

335

del newcopies[f]

334

heapq.heappush(work, (c, parent, newcopies))

336

heapq.heappush(work, (c, parent, newcopies))

335

assert False

337

assert False

336

338

337

339

338

def _forwardcopies(a, b, base=None, match=None):

340

def _forwardcopies(a, b, base=None, match=None):

339

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

341

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

340

342

341

if base is None:

343

if base is None:

342

base = a

344

base = a

343

match = a.repo().narrowmatch(match)

345

match = a.repo().narrowmatch(match)

344

# check for working copy

346

# check for working copy

345

if b.rev() is None:

347

if b.rev() is None:

346

cm = _committedforwardcopies(a, b.p1(), base, match)

348

cm = _committedforwardcopies(a, b.p1(), base, match)

347

# combine copies from dirstate if necessary

349

# combine copies from dirstate if necessary

348

copies = _chain(cm, _dirstatecopies(b._repo, match))

350

copies = _chain(cm, _dirstatecopies(b._repo, match))

349

else:

351

else:

350

copies = _committedforwardcopies(a, b, base, match)

352

copies = _committedforwardcopies(a, b, base, match)

351

return copies

353

return copies

352

354

353

355

354

def _backwardrenames(a, b, match):

356

def _backwardrenames(a, b, match):

355

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

357

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

356

return {}

358

return {}

357

359

358

# Even though we're not taking copies into account, 1:n rename situations

360

# Even though we're not taking copies into account, 1:n rename situations

359

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

361

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

360

# arbitrarily pick one of the renames.

362

# arbitrarily pick one of the renames.

361

# We don't want to pass in "match" here, since that would filter

363

# We don't want to pass in "match" here, since that would filter

362

# the destination by it. Since we're reversing the copies, we want

364

# the destination by it. Since we're reversing the copies, we want

363

# to filter the source instead.

365

# to filter the source instead.

364

f = _forwardcopies(b, a)

366

f = _forwardcopies(b, a)

365

r = {}

367

r = {}

366

for k, v in sorted(pycompat.iteritems(f)):

368

for k, v in sorted(pycompat.iteritems(f)):

367

if match and not match(v):

369

if match and not match(v):

368

continue

370

continue

369

# remove copies

371

# remove copies

370

if v in a:

372

if v in a:

371

continue

373

continue

372

r[v] = k

374

r[v] = k

373

return r

375

return r

374

376

375

377

376

def pathcopies(x, y, match=None):

378

def pathcopies(x, y, match=None):

377

"""find {dst@y: src@x} copy mapping for directed compare"""

379

"""find {dst@y: src@x} copy mapping for directed compare"""

378

repo = x._repo

380

repo = x._repo

379

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

381

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

380

if debug:

382

if debug:

381

repo.ui.debug(

383

repo.ui.debug(

382

b'debug.copies: searching copies from %s to %s\n' % (x, y)

384

b'debug.copies: searching copies from %s to %s\n' % (x, y)

383

)

385

)

384

if x == y or not x or not y:

386

if x == y or not x or not y:

385

return {}

387

return {}

386

a = y.ancestor(x)

388

a = y.ancestor(x)

387

if a == x:

389

if a == x:

388

if debug:

390

if debug:

389

repo.ui.debug(b'debug.copies: search mode: forward\n')

391

repo.ui.debug(b'debug.copies: search mode: forward\n')

390

if y.rev() is None and x == y.p1():

392

if y.rev() is None and x == y.p1():

391

# short-circuit to avoid issues with merge states

393

# short-circuit to avoid issues with merge states

392

return _dirstatecopies(repo, match)

394

return _dirstatecopies(repo, match)

393

copies = _forwardcopies(x, y, match=match)

395

copies = _forwardcopies(x, y, match=match)

394

elif a == y:

396

elif a == y:

395

if debug:

397

if debug:

396

repo.ui.debug(b'debug.copies: search mode: backward\n')

398

repo.ui.debug(b'debug.copies: search mode: backward\n')

397

copies = _backwardrenames(x, y, match=match)

399

copies = _backwardrenames(x, y, match=match)

398

else:

400

else:

399

if debug:

401

if debug:

400

repo.ui.debug(b'debug.copies: search mode: combined\n')

402

repo.ui.debug(b'debug.copies: search mode: combined\n')

401

base = None

403

base = None

402

if a.rev() != node.nullrev:

404

if a.rev() != node.nullrev:

403

base = x

405

base = x

404

copies = _chain(

406

copies = _chain(

405

_backwardrenames(x, a, match=match),

407

_backwardrenames(x, a, match=match),

406

_forwardcopies(a, y, base, match=match),

408

_forwardcopies(a, y, base, match=match),

407

)

409

)

408

_filter(x, y, copies)

410

_filter(x, y, copies)

409

return copies

411

return copies

410

412

411

413

412

def mergecopies(repo, c1, c2, base):

414

def mergecopies(repo, c1, c2, base):

413

"""

415

"""

414

Finds moves and copies between context c1 and c2 that are relevant for

416

Finds moves and copies between context c1 and c2 that are relevant for

415

merging. 'base' will be used as the merge base.

417

merging. 'base' will be used as the merge base.

416

418

417

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

419

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

418

files that were moved/ copied in one merge parent and modified in another.

420

files that were moved/ copied in one merge parent and modified in another.

419

For example:

421

For example:

420

422

421

o ---> 4 another commit

423

o ---> 4 another commit

422

|

424

|

423

| o ---> 3 commit that modifies a.txt

425

| o ---> 3 commit that modifies a.txt

424

| /

426

| /

425

o / ---> 2 commit that moves a.txt to b.txt

427

o / ---> 2 commit that moves a.txt to b.txt

426

|/

428

|/

427

o ---> 1 merge base

429

o ---> 1 merge base

428

430

429

If we try to rebase revision 3 on revision 4, since there is no a.txt in

431

If we try to rebase revision 3 on revision 4, since there is no a.txt in

430

revision 4, and if user have copytrace disabled, we prints the following

432

revision 4, and if user have copytrace disabled, we prints the following

431

message:

433

message:

432

434

433

```other changed <file> which local deleted```

435

```other changed <file> which local deleted```

434

436

435

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

437

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

436

"dirmove".

438

"dirmove".

437

439

438

"copy" is a mapping from destination name -> source name,

440

"copy" is a mapping from destination name -> source name,

439

where source is in c1 and destination is in c2 or vice-versa.

441

where source is in c1 and destination is in c2 or vice-versa.

440

442

441

"movewithdir" is a mapping from source name -> destination name,

443

"movewithdir" is a mapping from source name -> destination name,

442

where the file at source present in one context but not the other

444

where the file at source present in one context but not the other

443

needs to be moved to destination by the merge process, because the

445

needs to be moved to destination by the merge process, because the

444

other context moved the directory it is in.

446

other context moved the directory it is in.

445

447

446

"diverge" is a mapping of source name -> list of destination names

448

"diverge" is a mapping of source name -> list of destination names

447

for divergent renames.

449

for divergent renames.

448

450

449

"renamedelete" is a mapping of source name -> list of destination

451

"renamedelete" is a mapping of source name -> list of destination

450

names for files deleted in c1 that were renamed in c2 or vice-versa.

452

names for files deleted in c1 that were renamed in c2 or vice-versa.

451

453

452

"dirmove" is a mapping of detected source dir -> destination dir renames.

454

"dirmove" is a mapping of detected source dir -> destination dir renames.

453

This is needed for handling changes to new files previously grafted into

455

This is needed for handling changes to new files previously grafted into

454

renamed directories.

456

renamed directories.

455

457

456

This function calls different copytracing algorithms based on config.

458

This function calls different copytracing algorithms based on config.

457

"""

459

"""

458

# avoid silly behavior for update from empty dir

460

# avoid silly behavior for update from empty dir

459

if not c1 or not c2 or c1 == c2:

461

if not c1 or not c2 or c1 == c2:

460

return {}, {}, {}, {}, {}

462

return {}, {}, {}, {}, {}

461

463

462

narrowmatch = c1.repo().narrowmatch()

464

narrowmatch = c1.repo().narrowmatch()

463

465

464

# avoid silly behavior for parent -> working dir

466

# avoid silly behavior for parent -> working dir

465

if c2.node() is None and c1.node() == repo.dirstate.p1():

467

if c2.node() is None and c1.node() == repo.dirstate.p1():

466

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

468

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

467

469

468

copytracing = repo.ui.config(b'experimental', b'copytrace')

470

copytracing = repo.ui.config(b'experimental', b'copytrace')

469

if stringutil.parsebool(copytracing) is False:

471

if stringutil.parsebool(copytracing) is False:

470

# stringutil.parsebool() returns None when it is unable to parse the

472

# stringutil.parsebool() returns None when it is unable to parse the

471

# value, so we should rely on making sure copytracing is on such cases

473

# value, so we should rely on making sure copytracing is on such cases

472

return {}, {}, {}, {}, {}

474

return {}, {}, {}, {}, {}

473

475

474

if usechangesetcentricalgo(repo):

476

if usechangesetcentricalgo(repo):

475

# The heuristics don't make sense when we need changeset-centric algos

477

# The heuristics don't make sense when we need changeset-centric algos

476

return _fullcopytracing(repo, c1, c2, base)

478

return _fullcopytracing(repo, c1, c2, base)

477

479

478

# Copy trace disabling is explicitly below the node == p1 logic above

480

# Copy trace disabling is explicitly below the node == p1 logic above

479

# because the logic above is required for a simple copy to be kept across a

481

# because the logic above is required for a simple copy to be kept across a

480

# rebase.

482

# rebase.

481

if copytracing == b'heuristics':

483

if copytracing == b'heuristics':

482

# Do full copytracing if only non-public revisions are involved as

484

# Do full copytracing if only non-public revisions are involved as

483

# that will be fast enough and will also cover the copies which could

485

# that will be fast enough and will also cover the copies which could

484

# be missed by heuristics

486

# be missed by heuristics

485

if _isfullcopytraceable(repo, c1, base):

487

if _isfullcopytraceable(repo, c1, base):

486

return _fullcopytracing(repo, c1, c2, base)

488

return _fullcopytracing(repo, c1, c2, base)

487

return _heuristicscopytracing(repo, c1, c2, base)

489

return _heuristicscopytracing(repo, c1, c2, base)

488

else:

490

else:

489

return _fullcopytracing(repo, c1, c2, base)

491

return _fullcopytracing(repo, c1, c2, base)

490

492

491

493

492

def _isfullcopytraceable(repo, c1, base):

494

def _isfullcopytraceable(repo, c1, base):

493

""" Checks that if base, source and destination are all no-public branches,

495

""" Checks that if base, source and destination are all no-public branches,

494

if yes let's use the full copytrace algorithm for increased capabilities

496

if yes let's use the full copytrace algorithm for increased capabilities

495

since it will be fast enough.

497

since it will be fast enough.

496

498

497

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

499

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

498

number of changesets from c1 to base such that if number of changesets are

500

number of changesets from c1 to base such that if number of changesets are

499

more than the limit, full copytracing algorithm won't be used.

501

more than the limit, full copytracing algorithm won't be used.

500

"""

502

"""

501

if c1.rev() is None:

503

if c1.rev() is None:

502

c1 = c1.p1()

504

c1 = c1.p1()

503

if c1.mutable() and base.mutable():

505

if c1.mutable() and base.mutable():

504

sourcecommitlimit = repo.ui.configint(

506

sourcecommitlimit = repo.ui.configint(

505

b'experimental', b'copytrace.sourcecommitlimit'

507

b'experimental', b'copytrace.sourcecommitlimit'

506

)

508

)

507

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

509

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

508

return commits < sourcecommitlimit

510

return commits < sourcecommitlimit

509

return False

511

return False

510

512

511

513

512

def _checksinglesidecopies(

514

def _checksinglesidecopies(

513

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

515

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

514

):

516

):

515

if src not in m2:

517

if src not in m2:

516

# deleted on side 2

518

# deleted on side 2

517

if src not in m1:

519

if src not in m1:

518

# renamed on side 1, deleted on side 2

520

# renamed on side 1, deleted on side 2

519

renamedelete[src] = dsts1

521

renamedelete[src] = dsts1

520

elif m2[src] != mb[src]:

522

elif m2[src] != mb[src]:

521

if not _related(c2[src], base[src]):

523

if not _related(c2[src], base[src]):

522

return

524

return

523

# modified on side 2

525

# modified on side 2

524

for dst in dsts1:

526

for dst in dsts1:

525

if dst not in m2:

527

if dst not in m2:

526

# dst not added on side 2 (handle as regular

528

# dst not added on side 2 (handle as regular

527

# "both created" case in manifestmerge otherwise)

529

# "both created" case in manifestmerge otherwise)

528

copy[dst] = src

530

copy[dst] = src

529

531

530

532

531

def _fullcopytracing(repo, c1, c2, base):

533

def _fullcopytracing(repo, c1, c2, base):

532

""" The full copytracing algorithm which finds all the new files that were

534

""" The full copytracing algorithm which finds all the new files that were

533

added from merge base up to the top commit and for each file it checks if

535

added from merge base up to the top commit and for each file it checks if

534

this file was copied from another file.

536

this file was copied from another file.

535

537

536

This is pretty slow when a lot of changesets are involved but will track all

538

This is pretty slow when a lot of changesets are involved but will track all

537

the copies.

539

the copies.

538

"""

540

"""

539

m1 = c1.manifest()

541

m1 = c1.manifest()

540

m2 = c2.manifest()

542

m2 = c2.manifest()

541

mb = base.manifest()

543

mb = base.manifest()

542

544

543

copies1 = pathcopies(base, c1)

545

copies1 = pathcopies(base, c1)

544

copies2 = pathcopies(base, c2)

546

copies2 = pathcopies(base, c2)

545

547

546

inversecopies1 = {}

548

inversecopies1 = {}

547

inversecopies2 = {}

549

inversecopies2 = {}

548

for dst, src in copies1.items():

550

for dst, src in copies1.items():

549

inversecopies1.setdefault(src, []).append(dst)

551

inversecopies1.setdefault(src, []).append(dst)

550

for dst, src in copies2.items():

552

for dst, src in copies2.items():

551

inversecopies2.setdefault(src, []).append(dst)

553

inversecopies2.setdefault(src, []).append(dst)

552

554

553

copy = {}

555

copy = {}

554

diverge = {}

556

diverge = {}

555

renamedelete = {}

557

renamedelete = {}

556

allsources = set(inversecopies1) | set(inversecopies2)

558

allsources = set(inversecopies1) | set(inversecopies2)

557

for src in allsources:

559

for src in allsources:

558

dsts1 = inversecopies1.get(src)

560

dsts1 = inversecopies1.get(src)

559

dsts2 = inversecopies2.get(src)

561

dsts2 = inversecopies2.get(src)

560

if dsts1 and dsts2:

562

if dsts1 and dsts2:

561

# copied/renamed on both sides

563

# copied/renamed on both sides

562

if src not in m1 and src not in m2:

564

if src not in m1 and src not in m2:

563

# renamed on both sides

565

# renamed on both sides

564

dsts1 = set(dsts1)

566

dsts1 = set(dsts1)

565

dsts2 = set(dsts2)

567

dsts2 = set(dsts2)

566

# If there's some overlap in the rename destinations, we

568

# If there's some overlap in the rename destinations, we

567

# consider it not divergent. For example, if side 1 copies 'a'

569

# consider it not divergent. For example, if side 1 copies 'a'

568

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

570

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

569

# and 'd' and deletes 'a'.

571

# and 'd' and deletes 'a'.

570

if dsts1 & dsts2:

572

if dsts1 & dsts2:

571

for dst in dsts1 & dsts2:

573

for dst in dsts1 & dsts2:

572

copy[dst] = src

574

copy[dst] = src

573

else:

575

else:

574

diverge[src] = sorted(dsts1 | dsts2)

576

diverge[src] = sorted(dsts1 | dsts2)

575

elif src in m1 and src in m2:

577

elif src in m1 and src in m2:

576

# copied on both sides

578

# copied on both sides

577

dsts1 = set(dsts1)

579

dsts1 = set(dsts1)

578

dsts2 = set(dsts2)

580

dsts2 = set(dsts2)

579

for dst in dsts1 & dsts2:

581

for dst in dsts1 & dsts2:

580

copy[dst] = src

582

copy[dst] = src

581

# TODO: Handle cases where it was renamed on one side and copied

583

# TODO: Handle cases where it was renamed on one side and copied

582

# on the other side

584

# on the other side

583

elif dsts1:

585

elif dsts1:

584

# copied/renamed only on side 1

586

# copied/renamed only on side 1

585

_checksinglesidecopies(

587

_checksinglesidecopies(

586

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

588

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

587

)

589

)

588

elif dsts2:

590

elif dsts2:

589

# copied/renamed only on side 2

591

# copied/renamed only on side 2

590

_checksinglesidecopies(

592

_checksinglesidecopies(

591

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

593

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

592

)

594

)

593

595

594

renamedeleteset = set()

596

renamedeleteset = set()

595

divergeset = set()

597

divergeset = set()

596

for dsts in diverge.values():

598

for dsts in diverge.values():

597

divergeset.update(dsts)

599

divergeset.update(dsts)

598

for dsts in renamedelete.values():

600

for dsts in renamedelete.values():

599

renamedeleteset.update(dsts)

601

renamedeleteset.update(dsts)

600

602

601

# find interesting file sets from manifests

603

# find interesting file sets from manifests

602

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

604

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

603

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

605

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

604

u1 = sorted(addedinm1 - addedinm2)

606

u1 = sorted(addedinm1 - addedinm2)

605

u2 = sorted(addedinm2 - addedinm1)

607

u2 = sorted(addedinm2 - addedinm1)

606

608

607

header = b" unmatched files in %s"

609

header = b" unmatched files in %s"

608

if u1:

610

if u1:

609

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

611

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

610

if u2:

612

if u2:

611

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

613

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

612

614

613

fullcopy = copies1.copy()

615

fullcopy = copies1.copy()

614

fullcopy.update(copies2)

616

fullcopy.update(copies2)

615

if not fullcopy:

617

if not fullcopy:

616

return copy, {}, diverge, renamedelete, {}

618

return copy, {}, diverge, renamedelete, {}

617

619

618

if repo.ui.debugflag:

620

if repo.ui.debugflag:

619

repo.ui.debug(

621

repo.ui.debug(

620

b" all copies found (* = to merge, ! = divergent, "

622

b" all copies found (* = to merge, ! = divergent, "

621

b"% = renamed and deleted):\n"

623

b"% = renamed and deleted):\n"

622

)

624

)

623

for f in sorted(fullcopy):

625

for f in sorted(fullcopy):

624

note = b""

626

note = b""

625

if f in copy:

627

if f in copy:

626

note += b"*"

628

note += b"*"

627

if f in divergeset:

629

if f in divergeset:

628

note += b"!"

630

note += b"!"

629

if f in renamedeleteset:

631

if f in renamedeleteset:

630

note += b"%"

632

note += b"%"

631

repo.ui.debug(

633

repo.ui.debug(

632

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

634

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

633

)

635

)

634

del divergeset

636

del divergeset

635

637

636

repo.ui.debug(b" checking for directory renames\n")

638

repo.ui.debug(b" checking for directory renames\n")

637

639

638

# generate a directory move map

640

# generate a directory move map

639

d1, d2 = c1.dirs(), c2.dirs()

641

d1, d2 = c1.dirs(), c2.dirs()

640

invalid = set()

642

invalid = set()

641

dirmove = {}

643

dirmove = {}

642

644

643

# examine each file copy for a potential directory move, which is

645

# examine each file copy for a potential directory move, which is

644

# when all the files in a directory are moved to a new directory

646

# when all the files in a directory are moved to a new directory

645

for dst, src in pycompat.iteritems(fullcopy):

647

for dst, src in pycompat.iteritems(fullcopy):

646

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

648

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

647

if dsrc in invalid:

649

if dsrc in invalid:

648

# already seen to be uninteresting

650

# already seen to be uninteresting

649

continue

651

continue

650

elif dsrc in d1 and ddst in d1:

652

elif dsrc in d1 and ddst in d1:

651

# directory wasn't entirely moved locally

653

# directory wasn't entirely moved locally

652

invalid.add(dsrc)

654

invalid.add(dsrc)

653

elif dsrc in d2 and ddst in d2:

655

elif dsrc in d2 and ddst in d2:

654

# directory wasn't entirely moved remotely

656

# directory wasn't entirely moved remotely

655

invalid.add(dsrc)

657

invalid.add(dsrc)

656

elif dsrc in dirmove and dirmove[dsrc] != ddst:

658

elif dsrc in dirmove and dirmove[dsrc] != ddst:

657

# files from the same directory moved to two different places

659

# files from the same directory moved to two different places

658

invalid.add(dsrc)

660

invalid.add(dsrc)

659

else:

661

else:

660

# looks good so far

662

# looks good so far

661

dirmove[dsrc] = ddst

663

dirmove[dsrc] = ddst

662

664

663

for i in invalid:

665

for i in invalid:

664

if i in dirmove:

666

if i in dirmove:

665

del dirmove[i]

667

del dirmove[i]

666

del d1, d2, invalid

668

del d1, d2, invalid

667

669

668

if not dirmove:

670

if not dirmove:

669

return copy, {}, diverge, renamedelete, {}

671

return copy, {}, diverge, renamedelete, {}

670

672

671

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

673

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

672

674

673

for d in dirmove:

675

for d in dirmove:

674

repo.ui.debug(

676

repo.ui.debug(

675

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

677

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

676

)

678

)

677

679

678

movewithdir = {}

680

movewithdir = {}

679

# check unaccounted nonoverlapping files against directory moves

681

# check unaccounted nonoverlapping files against directory moves

680

for f in u1 + u2:

682

for f in u1 + u2:

681

if f not in fullcopy:

683

if f not in fullcopy:

682

for d in dirmove:

684

for d in dirmove:

683

if f.startswith(d):

685

if f.startswith(d):

684

# new file added in a directory that was moved, move it

686

# new file added in a directory that was moved, move it

685

df = dirmove[d] + f[len(d) :]

687

df = dirmove[d] + f[len(d) :]

686

if df not in copy:

688

if df not in copy:

687

movewithdir[f] = df

689

movewithdir[f] = df

688

repo.ui.debug(

690

repo.ui.debug(

689

b" pending file src: '%s' -> dst: '%s'\n"

691

b" pending file src: '%s' -> dst: '%s'\n"

690

% (f, df)

692

% (f, df)

691

)

693

)

692

break

694

break

693

695

694

return copy, movewithdir, diverge, renamedelete, dirmove

696

return copy, movewithdir, diverge, renamedelete, dirmove

695

697

696

698

697

def _heuristicscopytracing(repo, c1, c2, base):

699

def _heuristicscopytracing(repo, c1, c2, base):

698

""" Fast copytracing using filename heuristics

700

""" Fast copytracing using filename heuristics

699

701

700

Assumes that moves or renames are of following two types:

702

Assumes that moves or renames are of following two types:

701

703

702

1) Inside a directory only (same directory name but different filenames)

704

1) Inside a directory only (same directory name but different filenames)

703

2) Move from one directory to another

705

2) Move from one directory to another

704

(same filenames but different directory names)

706

(same filenames but different directory names)

705

707

706

Works only when there are no merge commits in the "source branch".

708

Works only when there are no merge commits in the "source branch".

707

Source branch is commits from base up to c2 not including base.

709

Source branch is commits from base up to c2 not including base.

708

710

709

If merge is involved it fallbacks to _fullcopytracing().

711

If merge is involved it fallbacks to _fullcopytracing().

710

712

711

Can be used by setting the following config:

713

Can be used by setting the following config:

712

714

713

[experimental]

715

[experimental]

714

copytrace = heuristics

716

copytrace = heuristics

715

717

716

In some cases the copy/move candidates found by heuristics can be very large

718

In some cases the copy/move candidates found by heuristics can be very large

717

in number and that will make the algorithm slow. The number of possible

719

in number and that will make the algorithm slow. The number of possible

718

candidates to check can be limited by using the config

720

candidates to check can be limited by using the config

719

`experimental.copytrace.movecandidateslimit` which defaults to 100.

721

`experimental.copytrace.movecandidateslimit` which defaults to 100.

720

"""

722

"""

721

723

722

if c1.rev() is None:

724

if c1.rev() is None:

723

c1 = c1.p1()

725

c1 = c1.p1()

724

if c2.rev() is None:

726

if c2.rev() is None:

725

c2 = c2.p1()

727

c2 = c2.p1()

726

728

727

copies = {}

729

copies = {}

728

730

729

changedfiles = set()

731

changedfiles = set()

730

m1 = c1.manifest()

732

m1 = c1.manifest()

731

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

733

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

732

# If base is not in c2 branch, we switch to fullcopytracing

734

# If base is not in c2 branch, we switch to fullcopytracing

733

repo.ui.debug(

735

repo.ui.debug(

734

b"switching to full copytracing as base is not "

736

b"switching to full copytracing as base is not "

735

b"an ancestor of c2\n"

737

b"an ancestor of c2\n"

736

)

738

)

737

return _fullcopytracing(repo, c1, c2, base)

739

return _fullcopytracing(repo, c1, c2, base)

738

740

739

ctx = c2

741

ctx = c2

740

while ctx != base:

742

while ctx != base:

741

if len(ctx.parents()) == 2:

743

if len(ctx.parents()) == 2:

742

# To keep things simple let's not handle merges

744

# To keep things simple let's not handle merges

743

repo.ui.debug(b"switching to full copytracing because of merges\n")

745

repo.ui.debug(b"switching to full copytracing because of merges\n")

744

return _fullcopytracing(repo, c1, c2, base)

746

return _fullcopytracing(repo, c1, c2, base)

745

changedfiles.update(ctx.files())

747

changedfiles.update(ctx.files())

746

ctx = ctx.p1()

748

ctx = ctx.p1()

747

749

748

cp = _forwardcopies(base, c2)

750

cp = _forwardcopies(base, c2)

749

for dst, src in pycompat.iteritems(cp):

751

for dst, src in pycompat.iteritems(cp):

750

if src in m1:

752

if src in m1:

751

copies[dst] = src

753

copies[dst] = src

752

754

753

# file is missing if it isn't present in the destination, but is present in

755

# file is missing if it isn't present in the destination, but is present in

754

# the base and present in the source.

756

# the base and present in the source.

755

# Presence in the base is important to exclude added files, presence in the

757

# Presence in the base is important to exclude added files, presence in the

756

# source is important to exclude removed files.

758

# source is important to exclude removed files.

757

filt = lambda f: f not in m1 and f in base and f in c2

759

filt = lambda f: f not in m1 and f in base and f in c2

758

missingfiles = [f for f in changedfiles if filt(f)]

760

missingfiles = [f for f in changedfiles if filt(f)]

759

761

760

if missingfiles:

762

if missingfiles:

761

basenametofilename = collections.defaultdict(list)

763

basenametofilename = collections.defaultdict(list)

762

dirnametofilename = collections.defaultdict(list)

764

dirnametofilename = collections.defaultdict(list)

763

765

764

for f in m1.filesnotin(base.manifest()):

766

for f in m1.filesnotin(base.manifest()):

765

basename = os.path.basename(f)

767

basename = os.path.basename(f)

766

dirname = os.path.dirname(f)

768

dirname = os.path.dirname(f)

767

basenametofilename[basename].append(f)

769

basenametofilename[basename].append(f)

768

dirnametofilename[dirname].append(f)

770

dirnametofilename[dirname].append(f)

769

771

770

for f in missingfiles:

772

for f in missingfiles:

771

basename = os.path.basename(f)

773

basename = os.path.basename(f)

772

dirname = os.path.dirname(f)

774

dirname = os.path.dirname(f)

773

samebasename = basenametofilename[basename]

775

samebasename = basenametofilename[basename]

774

samedirname = dirnametofilename[dirname]

776

samedirname = dirnametofilename[dirname]

775

movecandidates = samebasename + samedirname

777

movecandidates = samebasename + samedirname

776

# f is guaranteed to be present in c2, that's why

778

# f is guaranteed to be present in c2, that's why

777

# c2.filectx(f) won't fail

779

# c2.filectx(f) won't fail

778

f2 = c2.filectx(f)

780

f2 = c2.filectx(f)

779

# we can have a lot of candidates which can slow down the heuristics

781

# we can have a lot of candidates which can slow down the heuristics

780

# config value to limit the number of candidates moves to check

782

# config value to limit the number of candidates moves to check

781

maxcandidates = repo.ui.configint(

783

maxcandidates = repo.ui.configint(

782

b'experimental', b'copytrace.movecandidateslimit'

784

b'experimental', b'copytrace.movecandidateslimit'

783

)

785

)

784

786

785

if len(movecandidates) > maxcandidates:

787

if len(movecandidates) > maxcandidates:

786

repo.ui.status(

788

repo.ui.status(

787

_(

789

_(

788

b"skipping copytracing for '%s', more "

790

b"skipping copytracing for '%s', more "

789

b"candidates than the limit: %d\n"

791

b"candidates than the limit: %d\n"

790

)

792

)

791

% (f, len(movecandidates))

793

% (f, len(movecandidates))

792

)

794

)

793

continue

795

continue

794

796

795

for candidate in movecandidates:

797

for candidate in movecandidates:

796

f1 = c1.filectx(candidate)

798

f1 = c1.filectx(candidate)

797

if _related(f1, f2):

799

if _related(f1, f2):

798

# if there are a few related copies then we'll merge

800

# if there are a few related copies then we'll merge

799

# changes into all of them. This matches the behaviour

801

# changes into all of them. This matches the behaviour

800

# of upstream copytracing

802

# of upstream copytracing

801

copies[candidate] = f

803

copies[candidate] = f

802

804

803

return copies, {}, {}, {}, {}

805

return copies, {}, {}, {}, {}

804

806

805

807

806

def _related(f1, f2):

808

def _related(f1, f2):

807

"""return True if f1 and f2 filectx have a common ancestor

809

"""return True if f1 and f2 filectx have a common ancestor

808

810

809

Walk back to common ancestor to see if the two files originate

811

Walk back to common ancestor to see if the two files originate

810

from the same file. Since workingfilectx's rev() is None it messes

812

from the same file. Since workingfilectx's rev() is None it messes

811

up the integer comparison logic, hence the pre-step check for

813

up the integer comparison logic, hence the pre-step check for

812

None (f1 and f2 can only be workingfilectx's initially).

814

None (f1 and f2 can only be workingfilectx's initially).

813

"""

815

"""

814

816

815

if f1 == f2:

817

if f1 == f2:

816

return True # a match

818

return True # a match

817

819

818

g1, g2 = f1.ancestors(), f2.ancestors()

820

g1, g2 = f1.ancestors(), f2.ancestors()

819

try:

821

try:

820

f1r, f2r = f1.linkrev(), f2.linkrev()

822

f1r, f2r = f1.linkrev(), f2.linkrev()

821

823

822

if f1r is None:

824

if f1r is None:

823

f1 = next(g1)

825

f1 = next(g1)

824

if f2r is None:

826

if f2r is None:

825

f2 = next(g2)

827

f2 = next(g2)

826

828

827

while True:

829

while True:

828

f1r, f2r = f1.linkrev(), f2.linkrev()

830

f1r, f2r = f1.linkrev(), f2.linkrev()

829

if f1r > f2r:

831

if f1r > f2r:

830

f1 = next(g1)

832

f1 = next(g1)

831

elif f2r > f1r:

833

elif f2r > f1r:

832

f2 = next(g2)

834

f2 = next(g2)

833

else: # f1 and f2 point to files in the same linkrev

835

else: # f1 and f2 point to files in the same linkrev

834

return f1 == f2 # true if they point to the same file

836

return f1 == f2 # true if they point to the same file

835

except StopIteration:

837

except StopIteration:

836

return False

838

return False

837

839

838

840

839

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

841

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

840

"""reproduce copies from fromrev to rev in the dirstate

842

"""reproduce copies from fromrev to rev in the dirstate

841

843

842

If skiprev is specified, it's a revision that should be used to

844

If skiprev is specified, it's a revision that should be used to

843

filter copy records. Any copies that occur between fromrev and

845

filter copy records. Any copies that occur between fromrev and

844

skiprev will not be duplicated, even if they appear in the set of

846

skiprev will not be duplicated, even if they appear in the set of

845

copies between fromrev and rev.

847

copies between fromrev and rev.

846

"""

848

"""

847

exclude = {}

849

exclude = {}

848

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

850

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

849

bctrace = stringutil.parsebool(ctraceconfig)

851

bctrace = stringutil.parsebool(ctraceconfig)

850

if skiprev is not None and (

852

if skiprev is not None and (

851

ctraceconfig == b'heuristics' or bctrace or bctrace is None

853

ctraceconfig == b'heuristics' or bctrace or bctrace is None

852

):

854

):

853

# copytrace='off' skips this line, but not the entire function because

855

# copytrace='off' skips this line, but not the entire function because

854

# the line below is O(size of the repo) during a rebase, while the rest

856

# the line below is O(size of the repo) during a rebase, while the rest

855

# of the function is much faster (and is required for carrying copy

857

# of the function is much faster (and is required for carrying copy

856

# metadata across the rebase anyway).

858

# metadata across the rebase anyway).

857

exclude = pathcopies(repo[fromrev], repo[skiprev])

859

exclude = pathcopies(repo[fromrev], repo[skiprev])

858

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

860

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

859

if dst in exclude:

861

if dst in exclude:

860

continue

862

continue

861

if dst in wctx:

863

if dst in wctx:

862

wctx[dst].markcopied(src)

864

wctx[dst].markcopied(src)

863

865

864

866

865

def computechangesetfilesadded(ctx):

867

def computechangesetfilesadded(ctx):

866

"""return the list of files added in a changeset

868

"""return the list of files added in a changeset

867

"""

869

"""

868

added = []

870

added = []

869

for f in ctx.files():

871

for f in ctx.files():

870

if not any(f in p for p in ctx.parents()):

872

if not any(f in p for p in ctx.parents()):

871

added.append(f)

873

added.append(f)

872

return added

874

return added

873

875

874

876

875

def computechangesetfilesremoved(ctx):

877

def computechangesetfilesremoved(ctx):

876

"""return the list of files removed in a changeset

878

"""return the list of files removed in a changeset

877

"""

879

"""

878

removed = []

880

removed = []

879

for f in ctx.files():

881

for f in ctx.files():

880

if f not in ctx:

882

if f not in ctx:

881

removed.append(f)

883

removed.append(f)

882

return removed

884

return removed

883

885

884

886

885

def computechangesetcopies(ctx):

887

def computechangesetcopies(ctx):

886

"""return the copies data for a changeset

888

"""return the copies data for a changeset

887

889

888

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

890

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

889

891

890

Each dictionnary are in the form: `{newname: oldname}`

892

Each dictionnary are in the form: `{newname: oldname}`

891

"""

893

"""

892

p1copies = {}

894

p1copies = {}

893

p2copies = {}

895

p2copies = {}

894

p1 = ctx.p1()

896

p1 = ctx.p1()

895

p2 = ctx.p2()

897

p2 = ctx.p2()

896

narrowmatch = ctx._repo.narrowmatch()

898

narrowmatch = ctx._repo.narrowmatch()

897

for dst in ctx.files():

899

for dst in ctx.files():

898

if not narrowmatch(dst) or dst not in ctx:

900

if not narrowmatch(dst) or dst not in ctx:

899

continue

901

continue

900

copied = ctx[dst].renamed()

902

copied = ctx[dst].renamed()

901

if not copied:

903

if not copied:

902

continue

904

continue

903

src, srcnode = copied

905

src, srcnode = copied

904

if src in p1 and p1[src].filenode() == srcnode:

906

if src in p1 and p1[src].filenode() == srcnode:

905

p1copies[dst] = src

907

p1copies[dst] = src

906

elif src in p2 and p2[src].filenode() == srcnode:

908

elif src in p2 and p2[src].filenode() == srcnode:

907

p2copies[dst] = src

909

p2copies[dst] = src

908

return p1copies, p2copies

910

return p1copies, p2copies

909

911

910

912

911

def encodecopies(files, copies):

913

def encodecopies(files, copies):

912

items = []

914

items = []

913

for i, dst in enumerate(files):

915

for i, dst in enumerate(files):

914

if dst in copies:

916

if dst in copies:

915

items.append(b'%d\0%s' % (i, copies[dst]))

917

items.append(b'%d\0%s' % (i, copies[dst]))

916

if len(items) != len(copies):

918

if len(items) != len(copies):

917

raise error.ProgrammingError(

919

raise error.ProgrammingError(

918

b'some copy targets missing from file list'

920

b'some copy targets missing from file list'

919

)

921

)

920

return b"\n".join(items)

922

return b"\n".join(items)

921

923

922

924

923

def decodecopies(files, data):

925

def decodecopies(files, data):

924

try:

926

try:

925

copies = {}

927

copies = {}

926

if not data:

928

if not data:

927

return copies

929

return copies

928

for l in data.split(b'\n'):

930

for l in data.split(b'\n'):

929

strindex, src = l.split(b'\0')

931

strindex, src = l.split(b'\0')

930

i = int(strindex)

932

i = int(strindex)

931

dst = files[i]

933

dst = files[i]

932

copies[dst] = src

934

copies[dst] = src

933

return copies

935

return copies

934

except (ValueError, IndexError):

936

except (ValueError, IndexError):

935

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

937

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

936

# used different syntax for the value.

938

# used different syntax for the value.

937

return None

939

return None

938

940

939

941

940

def encodefileindices(files, subset):

942

def encodefileindices(files, subset):

941

subset = set(subset)

943

subset = set(subset)

942

indices = []

944

indices = []

943

for i, f in enumerate(files):

945

for i, f in enumerate(files):

944

if f in subset:

946

if f in subset:

945

indices.append(b'%d' % i)

947

indices.append(b'%d' % i)

946

return b'\n'.join(indices)

948

return b'\n'.join(indices)

947

949

948

950

949

def decodefileindices(files, data):

951

def decodefileindices(files, data):

950

try:

952

try:

951

subset = []

953

subset = []

952

if not data:

954

if not data:

953

return subset

955

return subset

954

for strindex in data.split(b'\n'):

956

for strindex in data.split(b'\n'):

955

i = int(strindex)

957

i = int(strindex)

956

if i < 0 or i >= len(files):

958

if i < 0 or i >= len(files):

957

return None

959

return None

958

subset.append(files[i])

960

subset.append(files[i])

959

return subset

961

return subset

960

except (ValueError, IndexError):

962

except (ValueError, IndexError):

961

# Perhaps someone had chosen the same key name (e.g. "added") and

963

# Perhaps someone had chosen the same key name (e.g. "added") and

962

# used different syntax for the value.

964

# used different syntax for the value.

963

return None

965

return None

964

966

965

967

966

def _getsidedata(srcrepo, rev):

968

def _getsidedata(srcrepo, rev):

967

ctx = srcrepo[rev]

969

ctx = srcrepo[rev]

968

filescopies = computechangesetcopies(ctx)

970

filescopies = computechangesetcopies(ctx)

969

filesadded = computechangesetfilesadded(ctx)

971

filesadded = computechangesetfilesadded(ctx)

970

filesremoved = computechangesetfilesremoved(ctx)

972

filesremoved = computechangesetfilesremoved(ctx)

971

sidedata = {}

973

sidedata = {}

972

if any([filescopies, filesadded, filesremoved]):

974

if any([filescopies, filesadded, filesremoved]):

973

sortedfiles = sorted(ctx.files())

975

sortedfiles = sorted(ctx.files())

974

p1copies, p2copies = filescopies

976

p1copies, p2copies = filescopies

975

p1copies = encodecopies(sortedfiles, p1copies)

977

p1copies = encodecopies(sortedfiles, p1copies)

976

p2copies = encodecopies(sortedfiles, p2copies)

978

p2copies = encodecopies(sortedfiles, p2copies)

977

filesadded = encodefileindices(sortedfiles, filesadded)

979

filesadded = encodefileindices(sortedfiles, filesadded)

978

filesremoved = encodefileindices(sortedfiles, filesremoved)

980

filesremoved = encodefileindices(sortedfiles, filesremoved)

979

sidedata[sidedatamod.SD_P1COPIES] = p1copies

981

sidedata[sidedatamod.SD_P1COPIES] = p1copies

980

sidedata[sidedatamod.SD_P2COPIES] = p2copies

982

sidedata[sidedatamod.SD_P2COPIES] = p2copies

981

sidedata[sidedatamod.SD_FILESADDED] = filesadded

983

sidedata[sidedatamod.SD_FILESADDED] = filesadded

982

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

984

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

983

return sidedata

985

return sidedata

984

986

985

987

986

def getsidedataadder(srcrepo, destrepo):

988

def getsidedataadder(srcrepo, destrepo):

987

def sidedatacompanion(revlog, rev):

989

def sidedatacompanion(revlog, rev):

988

sidedata = {}

990

sidedata = {}

989

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

991

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

990

sidedata = _getsidedata(srcrepo, rev)

992

sidedata = _getsidedata(srcrepo, rev)

991

return False, (), sidedata

993

return False, (), sidedata

992

994

993

return sidedatacompanion

995

return sidedatacompanion

994

996

995

997

996

def getsidedataremover(srcrepo, destrepo):

998

def getsidedataremover(srcrepo, destrepo):

997

def sidedatacompanion(revlog, rev):

999

def sidedatacompanion(revlog, rev):

998

f = ()

1000

f = ()

999

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1001

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1000

if revlog.flags(rev) & REVIDX_SIDEDATA:

1002

if revlog.flags(rev) & REVIDX_SIDEDATA:

1001

f = (

1003

f = (

1002

sidedatamod.SD_P1COPIES,

1004

sidedatamod.SD_P1COPIES,

1003

sidedatamod.SD_P2COPIES,

1005

sidedatamod.SD_P2COPIES,

1004

sidedatamod.SD_FILESADDED,

1006

sidedatamod.SD_FILESADDED,

1005

sidedatamod.SD_FILESREMOVED,

1007

sidedatamod.SD_FILESREMOVED,

1006

)

1008

)

1007

return False, f, {}

1009

return False, f, {}

1008

1010

1009

return sidedatacompanion

1011

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _findlimit(repo, ctxa, ctxb):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 wdirparents = None
                 a = ctxa.rev()
                 b = ctxb.rev()
                 if a is None:
                     wdirparents = (ctxa.p1(), ctxa.p2())
                     a = node.wdirrev
                 if b is None:
                     assert not wdirparents
                     wdirparents = (ctxb.p1(), ctxb.p2())
                     b = node.wdirrev
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 limit = node.wdirrev
                 while interesting:
                     r = -(heapq.heappop(visit))
                     if r == node.wdirrev:
                         parents = [pctx.rev() for pctx in wdirparents]
                     else:
                         parents = cl.parentrevs(r)
                     if parents[1] == node.nullrev:
                         parents = parents[:1]
                     for p in parents:
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                     if side[r]:
                         limit = r  # lowest rev visited
                         interesting -= 1
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf, limit):
                 """return file context that is the ancestor of fctx present in ancestor
-                manifest am, stopping after the first ancestor lower than limit"""
+                manifest am
+                Note: we used to try and stop after a given limit, however checking if that
+                limit is reached turned out to be very expensive. we are better off
+                disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
-                    if not f.isintroducedafter(limit):
-                        return None
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 limit = _findlimit(repo, a, b)
                 if debug:
                     dbg(b'debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf, limit)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo()
                 children = {}
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                 roots = set(children) - set(missingrevs)
                 # 'work' contains 3-tuples of a (revision number, parent number, copies).
                 # The parent number is only used for knowing which parent the copies dict
                 # came from.
                 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
                 # instance for *one* of the child nodes (the last one). Once an instance
                 # has been put on the queue, it is thus no longer safe to modify it.
                 # Conversely, it *is* safe to modify an instance popped off the queue.
                 work = [(r, 1, {}) for r in roots]
                 heapq.heapify(work)
                 alwaysmatch = match.always()
                 while work:
                     r, i1, copies = heapq.heappop(work)
                     if work and work[0][0] == r:
                         # We are tracing copies from both parents
                         r, i2, copies2 = heapq.heappop(work)
                         for dst, src in copies2.items():
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             if dst not in copies:
                                 # If it was copied on the p1 side, leave it as copied from
                                 # that side, even if it was also copied on the p2 side.
                                 copies[dst] = copies2[dst]
                     if r == b.rev():
                         return copies
                     for i, c in enumerate(children[r]):
                         childctx = repo[c]
                         if r == childctx.p1().rev():
                             parent = 1
                             childcopies = childctx.p1copies()
                         else:
                             assert r == childctx.p2().rev()
                             parent = 2
                             childcopies = childctx.p2copies()
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         # Copy the dict only if later iterations will also need it
                         if i != len(children[r]) - 1:
                             newcopies = copies.copy()
                         else:
                             newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                         for f in childctx.filesremoved():
                             if f in newcopies:
                                 del newcopies[f]
                         heapq.heappush(work, (c, parent, newcopies))
                 assert False
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion