upstream/mercurial-mirror Commit - r40112:2e9378f6

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

from .utils import (

23

from .utils import (

24

stringutil,

24

stringutil,

25

)

25

)

26

27

def _findlimit(repo, a, b):

27

def _findlimit(repo, a, b):

28

"""

28

"""

29

Find the last revision that needs to be checked to ensure that a full

29

Find the last revision that needs to be checked to ensure that a full

30

transitive closure for file copies can be properly calculated.

30

transitive closure for file copies can be properly calculated.

31

Generally, this means finding the earliest revision number that's an

31

Generally, this means finding the earliest revision number that's an

32

ancestor of a or b but not both, except when a or b is a direct descendent

32

ancestor of a or b but not both, except when a or b is a direct descendent

33

of the other, in which case we can return the minimum revnum of a and b.

33

of the other, in which case we can return the minimum revnum of a and b.

34

None if no such revision exists.

34

None if no such revision exists.

35

"""

35

"""

36

37

# basic idea:

37

# basic idea:

38

# - mark a and b with different sides

38

# - mark a and b with different sides

39

# - if a parent's children are all on the same side, the parent is

39

# - if a parent's children are all on the same side, the parent is

40

# on that side, otherwise it is on no side

40

# on that side, otherwise it is on no side

41

# - walk the graph in topological order with the help of a heap;

41

# - walk the graph in topological order with the help of a heap;

42

# - add unseen parents to side map

42

# - add unseen parents to side map

43

# - clear side of any parent that has children on different sides

43

# - clear side of any parent that has children on different sides

44

# - track number of interesting revs that might still be on a side

44

# - track number of interesting revs that might still be on a side

45

# - track the lowest interesting rev seen

45

# - track the lowest interesting rev seen

46

# - quit when interesting revs is zero

46

# - quit when interesting revs is zero

47

48

cl = repo.changelog

48

cl = repo.changelog

49

working = len(cl) # pseudo rev for the working directory

49

working = len(cl) # pseudo rev for the working directory

50

if a is None:

50

if a is None:

51

a = working

51

a = working

52

if b is None:

52

if b is None:

53

b = working

53

b = working

54

55

side = {a: -1, b: 1}

55

side = {a: -1, b: 1}

56

visit = [-a, -b]

56

visit = [-a, -b]

57

heapq.heapify(visit)

57

heapq.heapify(visit)

58

interesting = len(visit)

58

interesting = len(visit)

59

hascommonancestor = False

59

hascommonancestor = False

60

limit = working

60

limit = working

61

62

while interesting:

62

while interesting:

63

r = -heapq.heappop(visit)

63

r = -heapq.heappop(visit)

64

if r == working:

64

if r == working:

65

parents = [cl.rev(p) for p in repo.dirstate.parents()]

65

parents = [cl.rev(p) for p in repo.dirstate.parents()]

66

else:

66

else:

67

parents = cl.parentrevs(r)

67

parents = cl.parentrevs(r)

68

for p in parents:

68

for p in parents:

69

if p < 0:

69

if p < 0:

70

continue

70

continue

71

if p not in side:

71

if p not in side:

72

# first time we see p; add it to visit

72

# first time we see p; add it to visit

73

side[p] = side[r]

73

side[p] = side[r]

74

if side[p]:

74

if side[p]:

75

interesting += 1

75

interesting += 1

76

heapq.heappush(visit, -p)

76

heapq.heappush(visit, -p)

77

elif side[p] and side[p] != side[r]:

77

elif side[p] and side[p] != side[r]:

78

# p was interesting but now we know better

78

# p was interesting but now we know better

79

side[p] = 0

79

side[p] = 0

80

interesting -= 1

80

interesting -= 1

81

hascommonancestor = True

81

hascommonancestor = True

82

if side[r]:

82

if side[r]:

83

limit = r # lowest rev visited

83

limit = r # lowest rev visited

84

interesting -= 1

84

interesting -= 1

85

86

if not hascommonancestor:

86

if not hascommonancestor:

87

return None

87

return None

88

89

# Consider the following flow (see test-commit-amend.t under issue4405):

89

# Consider the following flow (see test-commit-amend.t under issue4405):

90

# 1/ File 'a0' committed

90

# 1/ File 'a0' committed

91

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

91

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

92

# 3/ Move back to first commit

92

# 3/ Move back to first commit

93

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

93

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

94

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

94

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

95

#

95

#

96

# During the amend in step five, we will be in this state:

96

# During the amend in step five, we will be in this state:

97

#

97

#

98

# @ 3 temporary amend commit for a1-amend

98

# @ 3 temporary amend commit for a1-amend

99

# |

99

# |

100

# o 2 a1-amend

100

# o 2 a1-amend

101

# |

101

# |

102

# | o 1 a1

102

# | o 1 a1

103

# |/

103

# |/

104

# o 0 a0

104

# o 0 a0

105

#

105

#

106

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

106

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

107

# yet the filelog has the copy information in rev 1 and we will not look

107

# yet the filelog has the copy information in rev 1 and we will not look

108

# back far enough unless we also look at the a and b as candidates.

108

# back far enough unless we also look at the a and b as candidates.

109

# This only occurs when a is a descendent of b or visa-versa.

109

# This only occurs when a is a descendent of b or visa-versa.

110

return min(limit, a, b)

110

return min(limit, a, b)

111

112

def _chain(src, dst, a, b):

112

def _chain(src, dst, a, b):

113

"""chain two sets of copies a->b"""

113

"""chain two sets of copies a->b"""

114

t = a.copy()

114

t = a.copy()

115

for k, v in b.iteritems():

115

for k, v in b.iteritems():

116

if v in t:

116

if v in t:

117

# found a chain

117

# found a chain

118

if t[v] != k:

118

if t[v] != k:

119

# file wasn't renamed back to itself

119

# file wasn't renamed back to itself

120

t[k] = t[v]

120

t[k] = t[v]

121

if v not in dst:

121

if v not in dst:

122

# chain was a rename, not a copy

122

# chain was a rename, not a copy

123

del t[v]

123

del t[v]

124

if v in src:

124

if v in src:

125

# file is a copy of an existing file

125

# file is a copy of an existing file

126

t[k] = v

126

t[k] = v

127

128

# remove criss-crossed copies

128

# remove criss-crossed copies

129

for k, v in list(t.items()):

129

for k, v in list(t.items()):

130

if k in src and v in dst:

130

if k in src and v in dst:

131

del t[k]

131

del t[k]

132

133

return t

133

return t

134

135

def _tracefile(fctx, am, limit=-1):

135

def _tracefile(fctx, am, limit=-1):

136

"""return file context that is the ancestor of fctx present in ancestor

136

"""return file context that is the ancestor of fctx present in ancestor

137

manifest am, stopping after the first ancestor lower than limit"""

137

manifest am, stopping after the first ancestor lower than limit"""

138

139

for f in fctx.ancestors():

139

for f in fctx.ancestors():

140

if am.get(f.path(), None) == f.filenode():

140

if am.get(f.path(), None) == f.filenode():

141

return f

141

return f

142

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

142

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

143

return None

143

return None

144

145

def _dirstatecopies(d, match=None):

145

def _dirstatecopies(d, match=None):

146

ds = d._repo.dirstate

146

ds = d._repo.dirstate

147

c = ds.copies().copy()

147

c = ds.copies().copy()

148

for k in list(c):

148

for k in list(c):

149

if ds[k] not in 'anm' or (match and not match(k)):

149

if ds[k] not in 'anm' or (match and not match(k)):

150

del c[k]

150

del c[k]

151

return c

151

return c

152

153

def _computeforwardmissing(a, b, match=None):

153

def _computeforwardmissing(a, b, match=None):

154

"""Computes which files are in b but not a.

154

"""Computes which files are in b but not a.

155

This is its own function so extensions can easily wrap this call to see what

155

This is its own function so extensions can easily wrap this call to see what

156

files _forwardcopies is about to process.

156

files _forwardcopies is about to process.

157

"""

157

"""

158

ma = a.manifest()

158

ma = a.manifest()

159

mb = b.manifest()

159

mb = b.manifest()

160

return mb.filesnotin(ma, match=match)

160

return mb.filesnotin(ma, match=match)

161

162

def _committedforwardcopies(a, b, match):

162

def _committedforwardcopies(a, b, match):

163

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

163

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

164

# files might have to be traced back to the fctx parent of the last

164

# files might have to be traced back to the fctx parent of the last

165

# one-side-only changeset, but not further back than that

165

# one-side-only changeset, but not further back than that

166

repo = a._repo

166

repo = a._repo

167

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

167

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

168

dbg = repo.ui.debug

168

dbg = repo.ui.debug

169

if debug:

169

if debug:

170

dbg('debug.copies: looking into rename from %s to %s\n'

170

dbg('debug.copies: looking into rename from %s to %s\n'

171

% (a, b))

171

% (a, b))

172

limit = _findlimit(repo, a.rev(), b.rev())

172

limit = _findlimit(repo, a.rev(), b.rev())

173

if limit is None:

173

if limit is None:

174

limit = -1

174

limit = -1

175

if debug:

175

if debug:

176

dbg('debug.copies: search limit: %d\n' % limit)

176

dbg('debug.copies: search limit: %d\n' % limit)

177

am = a.manifest()

177

am = a.manifest()

178

179

# find where new files came from

179

# find where new files came from

180

# we currently don't try to find where old files went, too expensive

180

# we currently don't try to find where old files went, too expensive

181

# this means we can miss a case like 'hg rm b; hg cp a b'

181

# this means we can miss a case like 'hg rm b; hg cp a b'

182

cm = {}

182

cm = {}

183

184

# Computing the forward missing is quite expensive on large manifests, since

184

# Computing the forward missing is quite expensive on large manifests, since

185

# it compares the entire manifests. We can optimize it in the common use

185

# it compares the entire manifests. We can optimize it in the common use

186

# case of computing what copies are in a commit versus its parent (like

186

# case of computing what copies are in a commit versus its parent (like

187

# during a rebase or histedit). Note, we exclude merge commits from this

187

# during a rebase or histedit). Note, we exclude merge commits from this

188

# optimization, since the ctx.files() for a merge commit is not correct for

188

# optimization, since the ctx.files() for a merge commit is not correct for

189

# this comparison.

189

# this comparison.

190

forwardmissingmatch = match

190

forwardmissingmatch = match

191

if b.p1() == a and b.p2().node() == node.nullid:

191

if b.p1() == a and b.p2().node() == node.nullid:

192

filesmatcher = scmutil.matchfiles(a._repo, b.files())

192

filesmatcher = scmutil.matchfiles(a._repo, b.files())

193

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

193

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

194

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

194

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

195

196

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

196

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

197

198

if debug:

198

if debug:

199

dbg('debug.copies: missing file to search: %d\n' % len(missing))

199

dbg('debug.copies: missing file to search: %d\n' % len(missing))

200

201

for f in missing:

201

for f in missing:

202

if debug:

202

if debug:

203

dbg('debug.copies: tracing file: %s\n' % f)

203

dbg('debug.copies: tracing file: %s\n' % f)

204

fctx = b[f]

204

fctx = b[f]

205

fctx._ancestrycontext = ancestrycontext

205

fctx._ancestrycontext = ancestrycontext

206

207

if debug:

207

if debug:

208

start = util.timer()

208

start = util.timer()

209

ofctx = _tracefile(fctx, am, limit)

209

ofctx = _tracefile(fctx, am, limit)

210

if ofctx:

210

if ofctx:

211

if debug:

211

if debug:

212

dbg('debug.copies: rename of: %s\n' % ofctx._path)

212

dbg('debug.copies: rename of: %s\n' % ofctx._path)

213

cm[f] = ofctx.path()

213

cm[f] = ofctx.path()

214

if debug:

214

if debug:

215

dbg('debug.copies: time: %s seconds\n'

215

dbg('debug.copies: time: %f seconds\n'

216

% (util.timer() - start))

216

% (util.timer() - start))

217

return cm

217

return cm

218

219

def _forwardcopies(a, b, match=None):

219

def _forwardcopies(a, b, match=None):

220

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

220

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

221

222

# check for working copy

222

# check for working copy

223

if b.rev() is None:

223

if b.rev() is None:

224

if a == b.p1():

224

if a == b.p1():

225

# short-circuit to avoid issues with merge states

225

# short-circuit to avoid issues with merge states

226

return _dirstatecopies(b, match)

226

return _dirstatecopies(b, match)

227

228

cm = _committedforwardcopies(a, b.p1(), match)

228

cm = _committedforwardcopies(a, b.p1(), match)

229

# combine copies from dirstate if necessary

229

# combine copies from dirstate if necessary

230

return _chain(a, b, cm, _dirstatecopies(b, match))

230

return _chain(a, b, cm, _dirstatecopies(b, match))

231

return _committedforwardcopies(a, b, match)

231

return _committedforwardcopies(a, b, match)

232

233

def _backwardrenames(a, b):

233

def _backwardrenames(a, b):

234

if a._repo.ui.config('experimental', 'copytrace') == 'off':

234

if a._repo.ui.config('experimental', 'copytrace') == 'off':

235

return {}

235

return {}

236

237

# Even though we're not taking copies into account, 1:n rename situations

237

# Even though we're not taking copies into account, 1:n rename situations

238

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

238

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

239

# arbitrarily pick one of the renames.

239

# arbitrarily pick one of the renames.

240

f = _forwardcopies(b, a)

240

f = _forwardcopies(b, a)

241

r = {}

241

r = {}

242

for k, v in sorted(f.iteritems()):

242

for k, v in sorted(f.iteritems()):

243

# remove copies

243

# remove copies

244

if v in a:

244

if v in a:

245

continue

245

continue

246

r[v] = k

246

r[v] = k

247

return r

247

return r

248

249

def pathcopies(x, y, match=None):

249

def pathcopies(x, y, match=None):

250

"""find {dst@y: src@x} copy mapping for directed compare"""

250

"""find {dst@y: src@x} copy mapping for directed compare"""

251

repo = x._repo

251

repo = x._repo

252

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

252

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

253

if debug:

253

if debug:

254

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

254

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

255

% (x, y))

255

% (x, y))

256

if x == y or not x or not y:

256

if x == y or not x or not y:

257

return {}

257

return {}

258

a = y.ancestor(x)

258

a = y.ancestor(x)

259

if a == x:

259

if a == x:

260

if debug:

260

if debug:

261

repo.ui.debug('debug.copies: search mode: forward\n')

261

repo.ui.debug('debug.copies: search mode: forward\n')

262

return _forwardcopies(x, y, match=match)

262

return _forwardcopies(x, y, match=match)

263

if a == y:

263

if a == y:

264

if debug:

264

if debug:

265

repo.ui.debug('debug.copies: search mode: backward\n')

265

repo.ui.debug('debug.copies: search mode: backward\n')

266

return _backwardrenames(x, y)

266

return _backwardrenames(x, y)

267

if debug:

267

if debug:

268

repo.ui.debug('debug.copies: search mode: combined\n')

268

repo.ui.debug('debug.copies: search mode: combined\n')

269

return _chain(x, y, _backwardrenames(x, a),

269

return _chain(x, y, _backwardrenames(x, a),

270

_forwardcopies(a, y, match=match))

270

_forwardcopies(a, y, match=match))

271

272

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

272

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

273

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

273

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

274

and c2. This is its own function so extensions can easily wrap this call

274

and c2. This is its own function so extensions can easily wrap this call

275

to see what files mergecopies is about to process.

275

to see what files mergecopies is about to process.

276

277

Even though c1 and c2 are not used in this function, they are useful in

277

Even though c1 and c2 are not used in this function, they are useful in

278

other extensions for being able to read the file nodes of the changed files.

278

other extensions for being able to read the file nodes of the changed files.

279

280

"baselabel" can be passed to help distinguish the multiple computations

280

"baselabel" can be passed to help distinguish the multiple computations

281

done in the graft case.

281

done in the graft case.

282

"""

282

"""

283

u1 = sorted(addedinm1 - addedinm2)

283

u1 = sorted(addedinm1 - addedinm2)

284

u2 = sorted(addedinm2 - addedinm1)

284

u2 = sorted(addedinm2 - addedinm1)

285

286

header = " unmatched files in %s"

286

header = " unmatched files in %s"

287

if baselabel:

287

if baselabel:

288

header += ' (from %s)' % baselabel

288

header += ' (from %s)' % baselabel

289

if u1:

289

if u1:

290

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

290

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

291

if u2:

291

if u2:

292

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

292

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

293

294

return u1, u2

294

return u1, u2

295

296

def _makegetfctx(ctx):

296

def _makegetfctx(ctx):

297

"""return a 'getfctx' function suitable for _checkcopies usage

297

"""return a 'getfctx' function suitable for _checkcopies usage

298

299

We have to re-setup the function building 'filectx' for each

299

We have to re-setup the function building 'filectx' for each

300

'_checkcopies' to ensure the linkrev adjustment is properly setup for

300

'_checkcopies' to ensure the linkrev adjustment is properly setup for

301

each. Linkrev adjustment is important to avoid bug in rename

301

each. Linkrev adjustment is important to avoid bug in rename

302

detection. Moreover, having a proper '_ancestrycontext' setup ensures

302

detection. Moreover, having a proper '_ancestrycontext' setup ensures

303

the performance impact of this adjustment is kept limited. Without it,

303

the performance impact of this adjustment is kept limited. Without it,

304

each file could do a full dag traversal making the time complexity of

304

each file could do a full dag traversal making the time complexity of

305

the operation explode (see issue4537).

305

the operation explode (see issue4537).

306

307

This function exists here mostly to limit the impact on stable. Feel

307

This function exists here mostly to limit the impact on stable. Feel

308

free to refactor on default.

308

free to refactor on default.

309

"""

309

"""

310

rev = ctx.rev()

310

rev = ctx.rev()

311

repo = ctx._repo

311

repo = ctx._repo

312

ac = getattr(ctx, '_ancestrycontext', None)

312

ac = getattr(ctx, '_ancestrycontext', None)

313

if ac is None:

313

if ac is None:

314

revs = [rev]

314

revs = [rev]

315

if rev is None:

315

if rev is None:

316

revs = [p.rev() for p in ctx.parents()]

316

revs = [p.rev() for p in ctx.parents()]

317

ac = repo.changelog.ancestors(revs, inclusive=True)

317

ac = repo.changelog.ancestors(revs, inclusive=True)

318

ctx._ancestrycontext = ac

318

ctx._ancestrycontext = ac

319

def makectx(f, n):

319

def makectx(f, n):

320

if n in node.wdirfilenodeids: # in a working context?

320

if n in node.wdirfilenodeids: # in a working context?

321

if ctx.rev() is None:

321

if ctx.rev() is None:

322

return ctx.filectx(f)

322

return ctx.filectx(f)

323

return repo[None][f]

323

return repo[None][f]

324

fctx = repo.filectx(f, fileid=n)

324

fctx = repo.filectx(f, fileid=n)

325

# setup only needed for filectx not create from a changectx

325

# setup only needed for filectx not create from a changectx

326

fctx._ancestrycontext = ac

326

fctx._ancestrycontext = ac

327

fctx._descendantrev = rev

327

fctx._descendantrev = rev

328

return fctx

328

return fctx

329

return util.lrucachefunc(makectx)

329

return util.lrucachefunc(makectx)

330

331

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

331

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

332

"""combine partial copy paths"""

332

"""combine partial copy paths"""

333

remainder = {}

333

remainder = {}

334

for f in copyfrom:

334

for f in copyfrom:

335

if f in copyto:

335

if f in copyto:

336

finalcopy[copyto[f]] = copyfrom[f]

336

finalcopy[copyto[f]] = copyfrom[f]

337

del copyto[f]

337

del copyto[f]

338

for f in incompletediverge:

338

for f in incompletediverge:

339

assert f not in diverge

339

assert f not in diverge

340

ic = incompletediverge[f]

340

ic = incompletediverge[f]

341

if ic[0] in copyto:

341

if ic[0] in copyto:

342

diverge[f] = [copyto[ic[0]], ic[1]]

342

diverge[f] = [copyto[ic[0]], ic[1]]

343

else:

343

else:

344

remainder[f] = ic

344

remainder[f] = ic

345

return remainder

345

return remainder

346

347

def mergecopies(repo, c1, c2, base):

347

def mergecopies(repo, c1, c2, base):

348

"""

348

"""

349

The function calling different copytracing algorithms on the basis of config

349

The function calling different copytracing algorithms on the basis of config

350

which find moves and copies between context c1 and c2 that are relevant for

350

which find moves and copies between context c1 and c2 that are relevant for

351

merging. 'base' will be used as the merge base.

351

merging. 'base' will be used as the merge base.

352

353

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

353

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

354

files that were moved/ copied in one merge parent and modified in another.

354

files that were moved/ copied in one merge parent and modified in another.

355

For example:

355

For example:

356

357

o ---> 4 another commit

357

o ---> 4 another commit

358

|

358

|

359

| o ---> 3 commit that modifies a.txt

359

| o ---> 3 commit that modifies a.txt

360

| /

360

| /

361

o / ---> 2 commit that moves a.txt to b.txt

361

o / ---> 2 commit that moves a.txt to b.txt

362

|/

362

|/

363

o ---> 1 merge base

363

o ---> 1 merge base

364

365

If we try to rebase revision 3 on revision 4, since there is no a.txt in

365

If we try to rebase revision 3 on revision 4, since there is no a.txt in

366

revision 4, and if user have copytrace disabled, we prints the following

366

revision 4, and if user have copytrace disabled, we prints the following

367

message:

367

message:

368

369

```other changed <file> which local deleted```

369

```other changed <file> which local deleted```

370

371

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

371

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

372

"dirmove".

372

"dirmove".

373

374

"copy" is a mapping from destination name -> source name,

374

"copy" is a mapping from destination name -> source name,

375

where source is in c1 and destination is in c2 or vice-versa.

375

where source is in c1 and destination is in c2 or vice-versa.

376

377

"movewithdir" is a mapping from source name -> destination name,

377

"movewithdir" is a mapping from source name -> destination name,

378

where the file at source present in one context but not the other

378

where the file at source present in one context but not the other

379

needs to be moved to destination by the merge process, because the

379

needs to be moved to destination by the merge process, because the

380

other context moved the directory it is in.

380

other context moved the directory it is in.

381

382

"diverge" is a mapping of source name -> list of destination names

382

"diverge" is a mapping of source name -> list of destination names

383

for divergent renames.

383

for divergent renames.

384

385

"renamedelete" is a mapping of source name -> list of destination

385

"renamedelete" is a mapping of source name -> list of destination

386

names for files deleted in c1 that were renamed in c2 or vice-versa.

386

names for files deleted in c1 that were renamed in c2 or vice-versa.

387

388

"dirmove" is a mapping of detected source dir -> destination dir renames.

388

"dirmove" is a mapping of detected source dir -> destination dir renames.

389

This is needed for handling changes to new files previously grafted into

389

This is needed for handling changes to new files previously grafted into

390

renamed directories.

390

renamed directories.

391

"""

391

"""

392

# avoid silly behavior for update from empty dir

392

# avoid silly behavior for update from empty dir

393

if not c1 or not c2 or c1 == c2:

393

if not c1 or not c2 or c1 == c2:

394

return {}, {}, {}, {}, {}

394

return {}, {}, {}, {}, {}

395

396

# avoid silly behavior for parent -> working dir

396

# avoid silly behavior for parent -> working dir

397

if c2.node() is None and c1.node() == repo.dirstate.p1():

397

if c2.node() is None and c1.node() == repo.dirstate.p1():

398

return repo.dirstate.copies(), {}, {}, {}, {}

398

return repo.dirstate.copies(), {}, {}, {}, {}

399

400

copytracing = repo.ui.config('experimental', 'copytrace')

400

copytracing = repo.ui.config('experimental', 'copytrace')

401

boolctrace = stringutil.parsebool(copytracing)

401

boolctrace = stringutil.parsebool(copytracing)

402

403

# Copy trace disabling is explicitly below the node == p1 logic above

403

# Copy trace disabling is explicitly below the node == p1 logic above

404

# because the logic above is required for a simple copy to be kept across a

404

# because the logic above is required for a simple copy to be kept across a

405

# rebase.

405

# rebase.

406

if copytracing == 'heuristics':

406

if copytracing == 'heuristics':

407

# Do full copytracing if only non-public revisions are involved as

407

# Do full copytracing if only non-public revisions are involved as

408

# that will be fast enough and will also cover the copies which could

408

# that will be fast enough and will also cover the copies which could

409

# be missed by heuristics

409

# be missed by heuristics

410

if _isfullcopytraceable(repo, c1, base):

410

if _isfullcopytraceable(repo, c1, base):

411

return _fullcopytracing(repo, c1, c2, base)

411

return _fullcopytracing(repo, c1, c2, base)

412

return _heuristicscopytracing(repo, c1, c2, base)

412

return _heuristicscopytracing(repo, c1, c2, base)

413

elif boolctrace is False:

413

elif boolctrace is False:

414

# stringutil.parsebool() returns None when it is unable to parse the

414

# stringutil.parsebool() returns None when it is unable to parse the

415

# value, so we should rely on making sure copytracing is on such cases

415

# value, so we should rely on making sure copytracing is on such cases

416

return {}, {}, {}, {}, {}

416

return {}, {}, {}, {}, {}

417

else:

417

else:

418

return _fullcopytracing(repo, c1, c2, base)

418

return _fullcopytracing(repo, c1, c2, base)

419

420

def _isfullcopytraceable(repo, c1, base):

420

def _isfullcopytraceable(repo, c1, base):

421

""" Checks that if base, source and destination are all no-public branches,

421

""" Checks that if base, source and destination are all no-public branches,

422

if yes let's use the full copytrace algorithm for increased capabilities

422

if yes let's use the full copytrace algorithm for increased capabilities

423

since it will be fast enough.

423

since it will be fast enough.

424

425

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

425

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

426

number of changesets from c1 to base such that if number of changesets are

426

number of changesets from c1 to base such that if number of changesets are

427

more than the limit, full copytracing algorithm won't be used.

427

more than the limit, full copytracing algorithm won't be used.

428

"""

428

"""

429

if c1.rev() is None:

429

if c1.rev() is None:

430

c1 = c1.p1()

430

c1 = c1.p1()

431

if c1.mutable() and base.mutable():

431

if c1.mutable() and base.mutable():

432

sourcecommitlimit = repo.ui.configint('experimental',

432

sourcecommitlimit = repo.ui.configint('experimental',

433

'copytrace.sourcecommitlimit')

433

'copytrace.sourcecommitlimit')

434

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

434

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

435

return commits < sourcecommitlimit

435

return commits < sourcecommitlimit

436

return False

436

return False

437

438

def _fullcopytracing(repo, c1, c2, base):

438

def _fullcopytracing(repo, c1, c2, base):

439

""" The full copytracing algorithm which finds all the new files that were

439

""" The full copytracing algorithm which finds all the new files that were

440

added from merge base up to the top commit and for each file it checks if

440

added from merge base up to the top commit and for each file it checks if

441

this file was copied from another file.

441

this file was copied from another file.

442

443

This is pretty slow when a lot of changesets are involved but will track all

443

This is pretty slow when a lot of changesets are involved but will track all

444

the copies.

444

the copies.

445

"""

445

"""

446

# In certain scenarios (e.g. graft, update or rebase), base can be

446

# In certain scenarios (e.g. graft, update or rebase), base can be

447

# overridden We still need to know a real common ancestor in this case We

447

# overridden We still need to know a real common ancestor in this case We

448

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

448

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

449

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

449

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

450

# caller may not know if the revision passed in lieu of the CA is a genuine

450

# caller may not know if the revision passed in lieu of the CA is a genuine

451

# common ancestor or not without explicitly checking it, it's better to

451

# common ancestor or not without explicitly checking it, it's better to

452

# determine that here.

452

# determine that here.

453

#

453

#

454

# base.isancestorof(wc) is False, work around that

454

# base.isancestorof(wc) is False, work around that

455

_c1 = c1.p1() if c1.rev() is None else c1

455

_c1 = c1.p1() if c1.rev() is None else c1

456

_c2 = c2.p1() if c2.rev() is None else c2

456

_c2 = c2.p1() if c2.rev() is None else c2

457

# an endpoint is "dirty" if it isn't a descendant of the merge base

457

# an endpoint is "dirty" if it isn't a descendant of the merge base

458

# if we have a dirty endpoint, we need to trigger graft logic, and also

458

# if we have a dirty endpoint, we need to trigger graft logic, and also

459

# keep track of which endpoint is dirty

459

# keep track of which endpoint is dirty

460

dirtyc1 = not base.isancestorof(_c1)

460

dirtyc1 = not base.isancestorof(_c1)

461

dirtyc2 = not base.isancestorof(_c2)

461

dirtyc2 = not base.isancestorof(_c2)

462

graft = dirtyc1 or dirtyc2

462

graft = dirtyc1 or dirtyc2

463

tca = base

463

tca = base

464

if graft:

464

if graft:

465

tca = _c1.ancestor(_c2)

465

tca = _c1.ancestor(_c2)

466

467

limit = _findlimit(repo, c1.rev(), c2.rev())

467

limit = _findlimit(repo, c1.rev(), c2.rev())

468

if limit is None:

468

if limit is None:

469

# no common ancestor, no copies

469

# no common ancestor, no copies

470

return {}, {}, {}, {}, {}

470

return {}, {}, {}, {}, {}

471

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

471

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

472

473

m1 = c1.manifest()

473

m1 = c1.manifest()

474

m2 = c2.manifest()

474

m2 = c2.manifest()

475

mb = base.manifest()

475

mb = base.manifest()

476

477

# gather data from _checkcopies:

477

# gather data from _checkcopies:

478

# - diverge = record all diverges in this dict

478

# - diverge = record all diverges in this dict

479

# - copy = record all non-divergent copies in this dict

479

# - copy = record all non-divergent copies in this dict

480

# - fullcopy = record all copies in this dict

480

# - fullcopy = record all copies in this dict

481

# - incomplete = record non-divergent partial copies here

481

# - incomplete = record non-divergent partial copies here

482

# - incompletediverge = record divergent partial copies here

482

# - incompletediverge = record divergent partial copies here

483

diverge = {} # divergence data is shared

483

diverge = {} # divergence data is shared

484

incompletediverge = {}

484

incompletediverge = {}

485

data1 = {'copy': {},

485

data1 = {'copy': {},

486

'fullcopy': {},

486

'fullcopy': {},

487

'incomplete': {},

487

'incomplete': {},

488

'diverge': diverge,

488

'diverge': diverge,

489

'incompletediverge': incompletediverge,

489

'incompletediverge': incompletediverge,

490

}

490

}

491

data2 = {'copy': {},

491

data2 = {'copy': {},

492

'fullcopy': {},

492

'fullcopy': {},

493

'incomplete': {},

493

'incomplete': {},

494

'diverge': diverge,

494

'diverge': diverge,

495

'incompletediverge': incompletediverge,

495

'incompletediverge': incompletediverge,

496

}

496

}

497

498

# find interesting file sets from manifests

498

# find interesting file sets from manifests

499

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

499

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

500

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

500

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

501

bothnew = sorted(addedinm1 & addedinm2)

501

bothnew = sorted(addedinm1 & addedinm2)

502

if tca == base:

502

if tca == base:

503

# unmatched file from base

503

# unmatched file from base

504

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

504

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

505

u1u, u2u = u1r, u2r

505

u1u, u2u = u1r, u2r

506

else:

506

else:

507

# unmatched file from base (DAG rotation in the graft case)

507

# unmatched file from base (DAG rotation in the graft case)

508

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

508

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

509

baselabel='base')

509

baselabel='base')

510

# unmatched file from topological common ancestors (no DAG rotation)

510

# unmatched file from topological common ancestors (no DAG rotation)

511

# need to recompute this for directory move handling when grafting

511

# need to recompute this for directory move handling when grafting

512

mta = tca.manifest()

512

mta = tca.manifest()

513

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

513

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

514

m2.filesnotin(mta),

514

m2.filesnotin(mta),

515

baselabel='topological common ancestor')

515

baselabel='topological common ancestor')

516

517

for f in u1u:

517

for f in u1u:

518

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

518

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

519

520

for f in u2u:

520

for f in u2u:

521

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

521

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

522

523

copy = dict(data1['copy'])

523

copy = dict(data1['copy'])

524

copy.update(data2['copy'])

524

copy.update(data2['copy'])

525

fullcopy = dict(data1['fullcopy'])

525

fullcopy = dict(data1['fullcopy'])

526

fullcopy.update(data2['fullcopy'])

526

fullcopy.update(data2['fullcopy'])

527

528

if dirtyc1:

528

if dirtyc1:

529

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

529

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

530

incompletediverge)

530

incompletediverge)

531

else:

531

else:

532

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

532

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

533

incompletediverge)

533

incompletediverge)

534

535

renamedelete = {}

535

renamedelete = {}

536

renamedeleteset = set()

536

renamedeleteset = set()

537

divergeset = set()

537

divergeset = set()

538

for of, fl in list(diverge.items()):

538

for of, fl in list(diverge.items()):

539

if len(fl) == 1 or of in c1 or of in c2:

539

if len(fl) == 1 or of in c1 or of in c2:

540

del diverge[of] # not actually divergent, or not a rename

540

del diverge[of] # not actually divergent, or not a rename

541

if of not in c1 and of not in c2:

541

if of not in c1 and of not in c2:

542

# renamed on one side, deleted on the other side, but filter

542

# renamed on one side, deleted on the other side, but filter

543

# out files that have been renamed and then deleted

543

# out files that have been renamed and then deleted

544

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

544

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

545

renamedeleteset.update(fl) # reverse map for below

545

renamedeleteset.update(fl) # reverse map for below

546

else:

546

else:

547

divergeset.update(fl) # reverse map for below

547

divergeset.update(fl) # reverse map for below

548

549

if bothnew:

549

if bothnew:

550

repo.ui.debug(" unmatched files new in both:\n %s\n"

550

repo.ui.debug(" unmatched files new in both:\n %s\n"

551

% "\n ".join(bothnew))

551

% "\n ".join(bothnew))

552

bothdiverge = {}

552

bothdiverge = {}

553

bothincompletediverge = {}

553

bothincompletediverge = {}

554

remainder = {}

554

remainder = {}

555

both1 = {'copy': {},

555

both1 = {'copy': {},

556

'fullcopy': {},

556

'fullcopy': {},

557

'incomplete': {},

557

'incomplete': {},

558

'diverge': bothdiverge,

558

'diverge': bothdiverge,

559

'incompletediverge': bothincompletediverge

559

'incompletediverge': bothincompletediverge

560

}

560

}

561

both2 = {'copy': {},

561

both2 = {'copy': {},

562

'fullcopy': {},

562

'fullcopy': {},

563

'incomplete': {},

563

'incomplete': {},

564

'diverge': bothdiverge,

564

'diverge': bothdiverge,

565

'incompletediverge': bothincompletediverge

565

'incompletediverge': bothincompletediverge

566

}

566

}

567

for f in bothnew:

567

for f in bothnew:

568

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

568

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

569

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

569

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

570

if dirtyc1:

570

if dirtyc1:

571

# incomplete copies may only be found on the "dirty" side for bothnew

571

# incomplete copies may only be found on the "dirty" side for bothnew

572

assert not both2['incomplete']

572

assert not both2['incomplete']

573

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

573

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

574

bothincompletediverge)

574

bothincompletediverge)

575

elif dirtyc2:

575

elif dirtyc2:

576

assert not both1['incomplete']

576

assert not both1['incomplete']

577

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

577

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

578

bothincompletediverge)

578

bothincompletediverge)

579

else:

579

else:

580

# incomplete copies and divergences can't happen outside grafts

580

# incomplete copies and divergences can't happen outside grafts

581

assert not both1['incomplete']

581

assert not both1['incomplete']

582

assert not both2['incomplete']

582

assert not both2['incomplete']

583

assert not bothincompletediverge

583

assert not bothincompletediverge

584

for f in remainder:

584

for f in remainder:

585

assert f not in bothdiverge

585

assert f not in bothdiverge

586

ic = remainder[f]

586

ic = remainder[f]

587

if ic[0] in (m1 if dirtyc1 else m2):

587

if ic[0] in (m1 if dirtyc1 else m2):

588

# backed-out rename on one side, but watch out for deleted files

588

# backed-out rename on one side, but watch out for deleted files

589

bothdiverge[f] = ic

589

bothdiverge[f] = ic

590

for of, fl in bothdiverge.items():

590

for of, fl in bothdiverge.items():

591

if len(fl) == 2 and fl[0] == fl[1]:

591

if len(fl) == 2 and fl[0] == fl[1]:

592

copy[fl[0]] = of # not actually divergent, just matching renames

592

copy[fl[0]] = of # not actually divergent, just matching renames

593

594

if fullcopy and repo.ui.debugflag:

594

if fullcopy and repo.ui.debugflag:

595

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

595

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

596

"% = renamed and deleted):\n")

596

"% = renamed and deleted):\n")

597

for f in sorted(fullcopy):

597

for f in sorted(fullcopy):

598

note = ""

598

note = ""

599

if f in copy:

599

if f in copy:

600

note += "*"

600

note += "*"

601

if f in divergeset:

601

if f in divergeset:

602

note += "!"

602

note += "!"

603

if f in renamedeleteset:

603

if f in renamedeleteset:

604

note += "%"

604

note += "%"

605

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

605

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

606

note))

606

note))

607

del divergeset

607

del divergeset

608

609

if not fullcopy:

609

if not fullcopy:

610

return copy, {}, diverge, renamedelete, {}

610

return copy, {}, diverge, renamedelete, {}

611

612

repo.ui.debug(" checking for directory renames\n")

612

repo.ui.debug(" checking for directory renames\n")

613

614

# generate a directory move map

614

# generate a directory move map

615

d1, d2 = c1.dirs(), c2.dirs()

615

d1, d2 = c1.dirs(), c2.dirs()

616

# Hack for adding '', which is not otherwise added, to d1 and d2

616

# Hack for adding '', which is not otherwise added, to d1 and d2

617

d1.addpath('/')

617

d1.addpath('/')

618

d2.addpath('/')

618

d2.addpath('/')

619

invalid = set()

619

invalid = set()

620

dirmove = {}

620

dirmove = {}

621

622

# examine each file copy for a potential directory move, which is

622

# examine each file copy for a potential directory move, which is

623

# when all the files in a directory are moved to a new directory

623

# when all the files in a directory are moved to a new directory

624

for dst, src in fullcopy.iteritems():

624

for dst, src in fullcopy.iteritems():

625

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

625

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

626

if dsrc in invalid:

626

if dsrc in invalid:

627

# already seen to be uninteresting

627

# already seen to be uninteresting

628

continue

628

continue

629

elif dsrc in d1 and ddst in d1:

629

elif dsrc in d1 and ddst in d1:

630

# directory wasn't entirely moved locally

630

# directory wasn't entirely moved locally

631

invalid.add(dsrc)

631

invalid.add(dsrc)

632

elif dsrc in d2 and ddst in d2:

632

elif dsrc in d2 and ddst in d2:

633

# directory wasn't entirely moved remotely

633

# directory wasn't entirely moved remotely

634

invalid.add(dsrc)

634

invalid.add(dsrc)

635

elif dsrc in dirmove and dirmove[dsrc] != ddst:

635

elif dsrc in dirmove and dirmove[dsrc] != ddst:

636

# files from the same directory moved to two different places

636

# files from the same directory moved to two different places

637

invalid.add(dsrc)

637

invalid.add(dsrc)

638

else:

638

else:

639

# looks good so far

639

# looks good so far

640

dirmove[dsrc] = ddst

640

dirmove[dsrc] = ddst

641

642

for i in invalid:

642

for i in invalid:

643

if i in dirmove:

643

if i in dirmove:

644

del dirmove[i]

644

del dirmove[i]

645

del d1, d2, invalid

645

del d1, d2, invalid

646

647

if not dirmove:

647

if not dirmove:

648

return copy, {}, diverge, renamedelete, {}

648

return copy, {}, diverge, renamedelete, {}

649

650

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

650

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

651

652

for d in dirmove:

652

for d in dirmove:

653

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

653

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

654

(d, dirmove[d]))

654

(d, dirmove[d]))

655

656

movewithdir = {}

656

movewithdir = {}

657

# check unaccounted nonoverlapping files against directory moves

657

# check unaccounted nonoverlapping files against directory moves

658

for f in u1r + u2r:

658

for f in u1r + u2r:

659

if f not in fullcopy:

659

if f not in fullcopy:

660

for d in dirmove:

660

for d in dirmove:

661

if f.startswith(d):

661

if f.startswith(d):

662

# new file added in a directory that was moved, move it

662

# new file added in a directory that was moved, move it

663

df = dirmove[d] + f[len(d):]

663

df = dirmove[d] + f[len(d):]

664

if df not in copy:

664

if df not in copy:

665

movewithdir[f] = df

665

movewithdir[f] = df

666

repo.ui.debug((" pending file src: '%s' -> "

666

repo.ui.debug((" pending file src: '%s' -> "

667

"dst: '%s'\n") % (f, df))

667

"dst: '%s'\n") % (f, df))

668

break

668

break

669

670

return copy, movewithdir, diverge, renamedelete, dirmove

670

return copy, movewithdir, diverge, renamedelete, dirmove

671

672

def _heuristicscopytracing(repo, c1, c2, base):

672

def _heuristicscopytracing(repo, c1, c2, base):

673

""" Fast copytracing using filename heuristics

673

""" Fast copytracing using filename heuristics

674

675

Assumes that moves or renames are of following two types:

675

Assumes that moves or renames are of following two types:

676

677

1) Inside a directory only (same directory name but different filenames)

677

1) Inside a directory only (same directory name but different filenames)

678

2) Move from one directory to another

678

2) Move from one directory to another

679

(same filenames but different directory names)

679

(same filenames but different directory names)

680

681

Works only when there are no merge commits in the "source branch".

681

Works only when there are no merge commits in the "source branch".

682

Source branch is commits from base up to c2 not including base.

682

Source branch is commits from base up to c2 not including base.

683

684

If merge is involved it fallbacks to _fullcopytracing().

684

If merge is involved it fallbacks to _fullcopytracing().

685

686

Can be used by setting the following config:

686

Can be used by setting the following config:

687

688

[experimental]

688

[experimental]

689

copytrace = heuristics

689

copytrace = heuristics

690

691

In some cases the copy/move candidates found by heuristics can be very large

691

In some cases the copy/move candidates found by heuristics can be very large

692

in number and that will make the algorithm slow. The number of possible

692

in number and that will make the algorithm slow. The number of possible

693

candidates to check can be limited by using the config

693

candidates to check can be limited by using the config

694

`experimental.copytrace.movecandidateslimit` which defaults to 100.

694

`experimental.copytrace.movecandidateslimit` which defaults to 100.

695

"""

695

"""

696

697

if c1.rev() is None:

697

if c1.rev() is None:

698

c1 = c1.p1()

698

c1 = c1.p1()

699

if c2.rev() is None:

699

if c2.rev() is None:

700

c2 = c2.p1()

700

c2 = c2.p1()

701

702

copies = {}

702

copies = {}

703

704

changedfiles = set()

704

changedfiles = set()

705

m1 = c1.manifest()

705

m1 = c1.manifest()

706

if not repo.revs('%d::%d', base.rev(), c2.rev()):

706

if not repo.revs('%d::%d', base.rev(), c2.rev()):

707

# If base is not in c2 branch, we switch to fullcopytracing

707

# If base is not in c2 branch, we switch to fullcopytracing

708

repo.ui.debug("switching to full copytracing as base is not "

708

repo.ui.debug("switching to full copytracing as base is not "

709

"an ancestor of c2\n")

709

"an ancestor of c2\n")

710

return _fullcopytracing(repo, c1, c2, base)

710

return _fullcopytracing(repo, c1, c2, base)

711

712

ctx = c2

712

ctx = c2

713

while ctx != base:

713

while ctx != base:

714

if len(ctx.parents()) == 2:

714

if len(ctx.parents()) == 2:

715

# To keep things simple let's not handle merges

715

# To keep things simple let's not handle merges

716

repo.ui.debug("switching to full copytracing because of merges\n")

716

repo.ui.debug("switching to full copytracing because of merges\n")

717

return _fullcopytracing(repo, c1, c2, base)

717

return _fullcopytracing(repo, c1, c2, base)

718

changedfiles.update(ctx.files())

718

changedfiles.update(ctx.files())

719

ctx = ctx.p1()

719

ctx = ctx.p1()

720

721

cp = _forwardcopies(base, c2)

721

cp = _forwardcopies(base, c2)

722

for dst, src in cp.iteritems():

722

for dst, src in cp.iteritems():

723

if src in m1:

723

if src in m1:

724

copies[dst] = src

724

copies[dst] = src

725

726

# file is missing if it isn't present in the destination, but is present in

726

# file is missing if it isn't present in the destination, but is present in

727

# the base and present in the source.

727

# the base and present in the source.

728

# Presence in the base is important to exclude added files, presence in the

728

# Presence in the base is important to exclude added files, presence in the

729

# source is important to exclude removed files.

729

# source is important to exclude removed files.

730

filt = lambda f: f not in m1 and f in base and f in c2

730

filt = lambda f: f not in m1 and f in base and f in c2

731

missingfiles = [f for f in changedfiles if filt(f)]

731

missingfiles = [f for f in changedfiles if filt(f)]

732

733

if missingfiles:

733

if missingfiles:

734

basenametofilename = collections.defaultdict(list)

734

basenametofilename = collections.defaultdict(list)

735

dirnametofilename = collections.defaultdict(list)

735

dirnametofilename = collections.defaultdict(list)

736

737

for f in m1.filesnotin(base.manifest()):

737

for f in m1.filesnotin(base.manifest()):

738

basename = os.path.basename(f)

738

basename = os.path.basename(f)

739

dirname = os.path.dirname(f)

739

dirname = os.path.dirname(f)

740

basenametofilename[basename].append(f)

740

basenametofilename[basename].append(f)

741

dirnametofilename[dirname].append(f)

741

dirnametofilename[dirname].append(f)

742

743

for f in missingfiles:

743

for f in missingfiles:

744

basename = os.path.basename(f)

744

basename = os.path.basename(f)

745

dirname = os.path.dirname(f)

745

dirname = os.path.dirname(f)

746

samebasename = basenametofilename[basename]

746

samebasename = basenametofilename[basename]

747

samedirname = dirnametofilename[dirname]

747

samedirname = dirnametofilename[dirname]

748

movecandidates = samebasename + samedirname

748

movecandidates = samebasename + samedirname

749

# f is guaranteed to be present in c2, that's why

749

# f is guaranteed to be present in c2, that's why

750

# c2.filectx(f) won't fail

750

# c2.filectx(f) won't fail

751

f2 = c2.filectx(f)

751

f2 = c2.filectx(f)

752

# we can have a lot of candidates which can slow down the heuristics

752

# we can have a lot of candidates which can slow down the heuristics

753

# config value to limit the number of candidates moves to check

753

# config value to limit the number of candidates moves to check

754

maxcandidates = repo.ui.configint('experimental',

754

maxcandidates = repo.ui.configint('experimental',

755

'copytrace.movecandidateslimit')

755

'copytrace.movecandidateslimit')

756

757

if len(movecandidates) > maxcandidates:

757

if len(movecandidates) > maxcandidates:

758

repo.ui.status(_("skipping copytracing for '%s', more "

758

repo.ui.status(_("skipping copytracing for '%s', more "

759

"candidates than the limit: %d\n")

759

"candidates than the limit: %d\n")

760

% (f, len(movecandidates)))

760

% (f, len(movecandidates)))

761

continue

761

continue

762

763

for candidate in movecandidates:

763

for candidate in movecandidates:

764

f1 = c1.filectx(candidate)

764

f1 = c1.filectx(candidate)

765

if _related(f1, f2):

765

if _related(f1, f2):

766

# if there are a few related copies then we'll merge

766

# if there are a few related copies then we'll merge

767

# changes into all of them. This matches the behaviour

767

# changes into all of them. This matches the behaviour

768

# of upstream copytracing

768

# of upstream copytracing

769

copies[candidate] = f

769

copies[candidate] = f

770

771

return copies, {}, {}, {}, {}

771

return copies, {}, {}, {}, {}

772

773

def _related(f1, f2):

773

def _related(f1, f2):

774

"""return True if f1 and f2 filectx have a common ancestor

774

"""return True if f1 and f2 filectx have a common ancestor

775

776

Walk back to common ancestor to see if the two files originate

776

Walk back to common ancestor to see if the two files originate

777

from the same file. Since workingfilectx's rev() is None it messes

777

from the same file. Since workingfilectx's rev() is None it messes

778

up the integer comparison logic, hence the pre-step check for

778

up the integer comparison logic, hence the pre-step check for

779

None (f1 and f2 can only be workingfilectx's initially).

779

None (f1 and f2 can only be workingfilectx's initially).

780

"""

780

"""

781

782

if f1 == f2:

782

if f1 == f2:

783

return f1 # a match

783

return f1 # a match

784

785

g1, g2 = f1.ancestors(), f2.ancestors()

785

g1, g2 = f1.ancestors(), f2.ancestors()

786

try:

786

try:

787

f1r, f2r = f1.linkrev(), f2.linkrev()

787

f1r, f2r = f1.linkrev(), f2.linkrev()

788

789

if f1r is None:

789

if f1r is None:

790

f1 = next(g1)

790

f1 = next(g1)

791

if f2r is None:

791

if f2r is None:

792

f2 = next(g2)

792

f2 = next(g2)

793

794

while True:

794

while True:

795

f1r, f2r = f1.linkrev(), f2.linkrev()

795

f1r, f2r = f1.linkrev(), f2.linkrev()

796

if f1r > f2r:

796

if f1r > f2r:

797

f1 = next(g1)

797

f1 = next(g1)

798

elif f2r > f1r:

798

elif f2r > f1r:

799

f2 = next(g2)

799

f2 = next(g2)

800

else: # f1 and f2 point to files in the same linkrev

800

else: # f1 and f2 point to files in the same linkrev

801

return f1 == f2 # true if they point to the same file

801

return f1 == f2 # true if they point to the same file

802

except StopIteration:

802

except StopIteration:

803

return False

803

return False

804

805

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

805

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

806

"""

806

"""

807

check possible copies of f from msrc to mdst

807

check possible copies of f from msrc to mdst

808

809

srcctx = starting context for f in msrc

809

srcctx = starting context for f in msrc

810

dstctx = destination context for f in mdst

810

dstctx = destination context for f in mdst

811

f = the filename to check (as in msrc)

811

f = the filename to check (as in msrc)

812

base = the changectx used as a merge base

812

base = the changectx used as a merge base

813

tca = topological common ancestor for graft-like scenarios

813

tca = topological common ancestor for graft-like scenarios

814

remotebase = True if base is outside tca::srcctx, False otherwise

814

remotebase = True if base is outside tca::srcctx, False otherwise

815

limit = the rev number to not search beyond

815

limit = the rev number to not search beyond

816

data = dictionary of dictionary to store copy data. (see mergecopies)

816

data = dictionary of dictionary to store copy data. (see mergecopies)

817

818

note: limit is only an optimization, and provides no guarantee that

818

note: limit is only an optimization, and provides no guarantee that

819

irrelevant revisions will not be visited

819

irrelevant revisions will not be visited

820

there is no easy way to make this algorithm stop in a guaranteed way

820

there is no easy way to make this algorithm stop in a guaranteed way

821

once it "goes behind a certain revision".

821

once it "goes behind a certain revision".

822

"""

822

"""

823

824

msrc = srcctx.manifest()

824

msrc = srcctx.manifest()

825

mdst = dstctx.manifest()

825

mdst = dstctx.manifest()

826

mb = base.manifest()

826

mb = base.manifest()

827

mta = tca.manifest()

827

mta = tca.manifest()

828

# Might be true if this call is about finding backward renames,

828

# Might be true if this call is about finding backward renames,

829

# This happens in the case of grafts because the DAG is then rotated.

829

# This happens in the case of grafts because the DAG is then rotated.

830

# If the file exists in both the base and the source, we are not looking

830

# If the file exists in both the base and the source, we are not looking

831

# for a rename on the source side, but on the part of the DAG that is

831

# for a rename on the source side, but on the part of the DAG that is

832

# traversed backwards.

832

# traversed backwards.

833

#

833

#

834

# In the case there is both backward and forward renames (before and after

834

# In the case there is both backward and forward renames (before and after

835

# the base) this is more complicated as we must detect a divergence.

835

# the base) this is more complicated as we must detect a divergence.

836

# We use 'backwards = False' in that case.

836

# We use 'backwards = False' in that case.

837

backwards = not remotebase and base != tca and f in mb

837

backwards = not remotebase and base != tca and f in mb

838

getsrcfctx = _makegetfctx(srcctx)

838

getsrcfctx = _makegetfctx(srcctx)

839

getdstfctx = _makegetfctx(dstctx)

839

getdstfctx = _makegetfctx(dstctx)

840

841

if msrc[f] == mb.get(f) and not remotebase:

841

if msrc[f] == mb.get(f) and not remotebase:

842

# Nothing to merge

842

# Nothing to merge

843

return

843

return

844

845

of = None

845

of = None

846

seen = {f}

846

seen = {f}

847

for oc in getsrcfctx(f, msrc[f]).ancestors():

847

for oc in getsrcfctx(f, msrc[f]).ancestors():

848

of = oc.path()

848

of = oc.path()

849

if of in seen:

849

if of in seen:

850

# check limit late - grab last rename before

850

# check limit late - grab last rename before

851

if oc.linkrev() < limit:

851

if oc.linkrev() < limit:

852

break

852

break

853

continue

853

continue

854

seen.add(of)

854

seen.add(of)

855

856

# remember for dir rename detection

856

# remember for dir rename detection

857

if backwards:

857

if backwards:

858

data['fullcopy'][of] = f # grafting backwards through renames

858

data['fullcopy'][of] = f # grafting backwards through renames

859

else:

859

else:

860

data['fullcopy'][f] = of

860

data['fullcopy'][f] = of

861

if of not in mdst:

861

if of not in mdst:

862

continue # no match, keep looking

862

continue # no match, keep looking

863

if mdst[of] == mb.get(of):

863

if mdst[of] == mb.get(of):

864

return # no merge needed, quit early

864

return # no merge needed, quit early

865

c2 = getdstfctx(of, mdst[of])

865

c2 = getdstfctx(of, mdst[of])

866

# c2 might be a plain new file on added on destination side that is

866

# c2 might be a plain new file on added on destination side that is

867

# unrelated to the droids we are looking for.

867

# unrelated to the droids we are looking for.

868

cr = _related(oc, c2)

868

cr = _related(oc, c2)

869

if cr and (of == f or of == c2.path()): # non-divergent

869

if cr and (of == f or of == c2.path()): # non-divergent

870

if backwards:

870

if backwards:

871

data['copy'][of] = f

871

data['copy'][of] = f

872

elif of in mb:

872

elif of in mb:

873

data['copy'][f] = of

873

data['copy'][f] = of

874

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

874

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

875

data['copy'][of] = f

875

data['copy'][of] = f

876

del data['fullcopy'][f]

876

del data['fullcopy'][f]

877

data['fullcopy'][of] = f

877

data['fullcopy'][of] = f

878

else: # divergence w.r.t. graft CA on one side of topological CA

878

else: # divergence w.r.t. graft CA on one side of topological CA

879

for sf in seen:

879

for sf in seen:

880

if sf in mb:

880

if sf in mb:

881

assert sf not in data['diverge']

881

assert sf not in data['diverge']

882

data['diverge'][sf] = [f, of]

882

data['diverge'][sf] = [f, of]

883

break

883

break

884

return

884

return

885

886

if of in mta:

886

if of in mta:

887

if backwards or remotebase:

887

if backwards or remotebase:

888

data['incomplete'][of] = f

888

data['incomplete'][of] = f

889

else:

889

else:

890

for sf in seen:

890

for sf in seen:

891

if sf in mb:

891

if sf in mb:

892

if tca == base:

892

if tca == base:

893

data['diverge'].setdefault(sf, []).append(f)

893

data['diverge'].setdefault(sf, []).append(f)

894

else:

894

else:

895

data['incompletediverge'][sf] = [of, f]

895

data['incompletediverge'][sf] = [of, f]

896

return

896

return

897

898

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

898

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

899

"""reproduce copies from fromrev to rev in the dirstate

899

"""reproduce copies from fromrev to rev in the dirstate

900

901

If skiprev is specified, it's a revision that should be used to

901

If skiprev is specified, it's a revision that should be used to

902

filter copy records. Any copies that occur between fromrev and

902

filter copy records. Any copies that occur between fromrev and

903

skiprev will not be duplicated, even if they appear in the set of

903

skiprev will not be duplicated, even if they appear in the set of

904

copies between fromrev and rev.

904

copies between fromrev and rev.

905

"""

905

"""

906

exclude = {}

906

exclude = {}

907

ctraceconfig = repo.ui.config('experimental', 'copytrace')

907

ctraceconfig = repo.ui.config('experimental', 'copytrace')

908

bctrace = stringutil.parsebool(ctraceconfig)

908

bctrace = stringutil.parsebool(ctraceconfig)

909

if (skiprev is not None and

909

if (skiprev is not None and

910

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

910

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

911

# copytrace='off' skips this line, but not the entire function because

911

# copytrace='off' skips this line, but not the entire function because

912

# the line below is O(size of the repo) during a rebase, while the rest

912

# the line below is O(size of the repo) during a rebase, while the rest

913

# of the function is much faster (and is required for carrying copy

913

# of the function is much faster (and is required for carrying copy

914

# metadata across the rebase anyway).

914

# metadata across the rebase anyway).

915

exclude = pathcopies(repo[fromrev], repo[skiprev])

915

exclude = pathcopies(repo[fromrev], repo[skiprev])

916

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

916

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

917

# copies.pathcopies returns backward renames, so dst might not

917

# copies.pathcopies returns backward renames, so dst might not

918

# actually be in the dirstate

918

# actually be in the dirstate

919

if dst in exclude:

919

if dst in exclude:

920

continue

920

continue

921

wctx[dst].markcopied(src)

921

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in list(t.items()):
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d, match=None):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg('debug.copies:    looking into rename from %s to %s\n'
                         % (a, b))
                 limit = _findlimit(repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 if debug:
                     dbg('debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg('debug.copies:      missing file to search: %d\n' % len(missing))
                 for f in missing:
                     if debug:
                         dbg('debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         if debug:
                             dbg('debug.copies:          rename of: %s\n' % ofctx._path)
                         cm[f] = ofctx.path()
                     if debug:
-                        dbg('debug.copies:          time: %s seconds\n'
+                        dbg('debug.copies:          time: %f seconds\n'
                             % (util.timer() - start))
                 return cm
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 if debug:
                     repo.ui.debug('debug.copies: searching copies from %s to %s\n'
                                   % (x, y))
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: forward\n')
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: backward\n')
                     return _backwardrenames(x, y)
                 if debug:
                     repo.ui.debug('debug.copies: search mode: combined\n')
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirfilenodeids:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 boolctrace = stringutil.parsebool(copytracing)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 elif boolctrace is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.isancestorof(wc) is False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not base.isancestorof(_c1)
                 dirtyc2 = not base.isancestorof(_c2)
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
                                                                 m2.filesnotin(mta),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if oc.linkrev() < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config('experimental', 'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if (skiprev is not None and
                     (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)