upstream/mercurial-mirror Commit - r41873:35158796

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

from .utils import (

23

from .utils import (

24

stringutil,

24

stringutil,

25

)

25

)

26

27

def _findlimit(repo, ctxa, ctxb):

27

def _findlimit(repo, ctxa, ctxb):

28

"""

28

"""

29

Find the last revision that needs to be checked to ensure that a full

29

Find the last revision that needs to be checked to ensure that a full

30

transitive closure for file copies can be properly calculated.

30

transitive closure for file copies can be properly calculated.

31

Generally, this means finding the earliest revision number that's an

31

Generally, this means finding the earliest revision number that's an

32

ancestor of a or b but not both, except when a or b is a direct descendent

32

ancestor of a or b but not both, except when a or b is a direct descendent

33

of the other, in which case we can return the minimum revnum of a and b.

33

of the other, in which case we can return the minimum revnum of a and b.

34

"""

34

"""

35

36

# basic idea:

36

# basic idea:

37

# - mark a and b with different sides

37

# - mark a and b with different sides

38

# - if a parent's children are all on the same side, the parent is

38

# - if a parent's children are all on the same side, the parent is

39

# on that side, otherwise it is on no side

39

# on that side, otherwise it is on no side

40

# - walk the graph in topological order with the help of a heap;

40

# - walk the graph in topological order with the help of a heap;

41

# - add unseen parents to side map

41

# - add unseen parents to side map

42

# - clear side of any parent that has children on different sides

42

# - clear side of any parent that has children on different sides

43

# - track number of interesting revs that might still be on a side

43

# - track number of interesting revs that might still be on a side

44

# - track the lowest interesting rev seen

44

# - track the lowest interesting rev seen

45

# - quit when interesting revs is zero

45

# - quit when interesting revs is zero

46

47

cl = repo.changelog

47

cl = repo.changelog

48

wdirparents = None

48

wdirparents = None

49

a = ctxa.rev()

49

a = ctxa.rev()

50

b = ctxb.rev()

50

b = ctxb.rev()

51

if a is None:

51

if a is None:

52

wdirparents = (ctxa.p1(), ctxa.p2())

52

wdirparents = (ctxa.p1(), ctxa.p2())

53

a = node.wdirrev

53

a = node.wdirrev

54

if b is None:

54

if b is None:

55

assert not wdirparents

55

assert not wdirparents

56

wdirparents = (ctxb.p1(), ctxb.p2())

56

wdirparents = (ctxb.p1(), ctxb.p2())

57

b = node.wdirrev

57

b = node.wdirrev

58

59

side = {a: -1, b: 1}

59

side = {a: -1, b: 1}

60

visit = [-a, -b]

60

visit = [-a, -b]

61

heapq.heapify(visit)

61

heapq.heapify(visit)

62

interesting = len(visit)

62

interesting = len(visit)

63

limit = node.wdirrev

63

limit = node.wdirrev

64

65

while interesting:

65

while interesting:

66

r = -heapq.heappop(visit)

66

r = -heapq.heappop(visit)

67

if r == node.wdirrev:

67

if r == node.wdirrev:

68

parents = [pctx.rev() for pctx in wdirparents]

68

parents = [pctx.rev() for pctx in wdirparents]

69

else:

69

else:

70

parents = cl.parentrevs(r)

70

parents = cl.parentrevs(r)

71

if parents[1] == node.nullrev:

71

if parents[1] == node.nullrev:

72

parents = parents[:1]

72

parents = parents[:1]

73

for p in parents:

73

for p in parents:

74

if p not in side:

74

if p not in side:

75

# first time we see p; add it to visit

75

# first time we see p; add it to visit

76

side[p] = side[r]

76

side[p] = side[r]

77

if side[p]:

77

if side[p]:

78

interesting += 1

78

interesting += 1

79

heapq.heappush(visit, -p)

79

heapq.heappush(visit, -p)

80

elif side[p] and side[p] != side[r]:

80

elif side[p] and side[p] != side[r]:

81

# p was interesting but now we know better

81

# p was interesting but now we know better

82

side[p] = 0

82

side[p] = 0

83

interesting -= 1

83

interesting -= 1

84

if side[r]:

84

if side[r]:

85

limit = r # lowest rev visited

85

limit = r # lowest rev visited

86

interesting -= 1

86

interesting -= 1

87

88

# Consider the following flow (see test-commit-amend.t under issue4405):

88

# Consider the following flow (see test-commit-amend.t under issue4405):

89

# 1/ File 'a0' committed

89

# 1/ File 'a0' committed

90

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

90

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

91

# 3/ Move back to first commit

91

# 3/ Move back to first commit

92

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

92

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

93

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

93

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

94

#

94

#

95

# During the amend in step five, we will be in this state:

95

# During the amend in step five, we will be in this state:

96

#

96

#

97

# @ 3 temporary amend commit for a1-amend

97

# @ 3 temporary amend commit for a1-amend

98

# |

98

# |

99

# o 2 a1-amend

99

# o 2 a1-amend

100

# |

100

# |

101

# | o 1 a1

101

# | o 1 a1

102

# |/

102

# |/

103

# o 0 a0

103

# o 0 a0

104

#

104

#

105

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

105

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

106

# yet the filelog has the copy information in rev 1 and we will not look

106

# yet the filelog has the copy information in rev 1 and we will not look

107

# back far enough unless we also look at the a and b as candidates.

107

# back far enough unless we also look at the a and b as candidates.

108

# This only occurs when a is a descendent of b or visa-versa.

108

# This only occurs when a is a descendent of b or visa-versa.

109

return min(limit, a, b)

109

return min(limit, a, b)

110

111

def _chain(src, dst, a, b):

111

def _chain(src, dst, a, b):

112

"""chain two sets of copies a->b"""

112

"""chain two sets of copies a->b"""

113

t = a.copy()

113

t = a.copy()

114

for k, v in b.iteritems():

114

for k, v in b.iteritems():

115

if v in t:

115

if v in t:

116

# found a chain

116

# found a chain

117

if t[v] != k:

117

if t[v] != k:

118

# file wasn't renamed back to itself

118

# file wasn't renamed back to itself

119

t[k] = t[v]

119

t[k] = t[v]

120

if v not in dst:

120

if v not in dst:

121

# chain was a rename, not a copy

121

# chain was a rename, not a copy

122

del t[v]

122

del t[v]

123

if v in src:

123

if v in src:

124

# file is a copy of an existing file

124

# file is a copy of an existing file

125

t[k] = v

125

t[k] = v

126

127

# remove criss-crossed copies

127

# remove criss-crossed copies

128

for k, v in list(t.items()):

128

for k, v in list(t.items()):

129

if k in src and v in dst:

129

if k in src and v in dst:

130

del t[k]

130

del t[k]

131

132

return t

132

return t

133

134

def _tracefile(fctx, am, limit=node.nullrev):

134

def _tracefile(fctx, am, limit=node.nullrev):

135

"""return file context that is the ancestor of fctx present in ancestor

135

"""return file context that is the ancestor of fctx present in ancestor

136

manifest am, stopping after the first ancestor lower than limit"""

136

manifest am, stopping after the first ancestor lower than limit"""

137

138

for f in fctx.ancestors():

138

for f in fctx.ancestors():

139

if am.get(f.path(), None) == f.filenode():

139

if am.get(f.path(), None) == f.filenode():

140

return f

140

return f

141

if limit >= 0 and not f.isintroducedafter(limit):

141

if limit >= 0 and not f.isintroducedafter(limit):

142

return None

142

return None

143

144

def _dirstatecopies(d, match=None):

144

def _dirstatecopies(d, match=None):

145

ds = d._repo.dirstate

145

ds = d._repo.dirstate

146

c = ds.copies().copy()

146

c = ds.copies().copy()

147

for k in list(c):

147

for k in list(c):

148

if ds[k] not in 'anm' or (match and not match(k)):

148

if ds[k] not in 'anm' or (match and not match(k)):

149

del c[k]

149

del c[k]

150

return c

150

return c

151

152

def _computeforwardmissing(a, b, match=None):

152

def _computeforwardmissing(a, b, match=None):

153

"""Computes which files are in b but not a.

153

"""Computes which files are in b but not a.

154

This is its own function so extensions can easily wrap this call to see what

154

This is its own function so extensions can easily wrap this call to see what

155

files _forwardcopies is about to process.

155

files _forwardcopies is about to process.

156

"""

156

"""

157

ma = a.manifest()

157

ma = a.manifest()

158

mb = b.manifest()

158

mb = b.manifest()

159

return mb.filesnotin(ma, match=match)

159

return mb.filesnotin(ma, match=match)

160

161

def _committedforwardcopies(a, b, match):

161

def _committedforwardcopies(a, b, match):

162

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

162

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

163

# files might have to be traced back to the fctx parent of the last

163

# files might have to be traced back to the fctx parent of the last

164

# one-side-only changeset, but not further back than that

164

# one-side-only changeset, but not further back than that

165

repo = a._repo

165

repo = a._repo

166

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

166

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

167

dbg = repo.ui.debug

167

dbg = repo.ui.debug

168

if debug:

168

if debug:

169

dbg('debug.copies: looking into rename from %s to %s\n'

169

dbg('debug.copies: looking into rename from %s to %s\n'

170

% (a, b))

170

% (a, b))

171

limit = _findlimit(repo, a, b)

171

limit = _findlimit(repo, a, b)

172

if debug:

172

if debug:

173

dbg('debug.copies: search limit: %d\n' % limit)

173

dbg('debug.copies: search limit: %d\n' % limit)

174

am = a.manifest()

174

am = a.manifest()

175

176

# find where new files came from

176

# find where new files came from

177

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

178

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

179

cm = {}

179

cm = {}

180

181

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

182

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

183

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

184

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

185

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

186

# this comparison.

186

# this comparison.

187

forwardmissingmatch = match

187

forwardmissingmatch = match

188

if b.p1() == a and b.p2().node() == node.nullid:

188

if b.p1() == a and b.p2().node() == node.nullid:

189

filesmatcher = scmutil.matchfiles(a._repo, b.files())

189

filesmatcher = scmutil.matchfiles(a._repo, b.files())

190

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

190

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

191

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

192

193

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

194

195

if debug:

195

if debug:

196

dbg('debug.copies: missing file to search: %d\n' % len(missing))

196

dbg('debug.copies: missing file to search: %d\n' % len(missing))

197

198

for f in missing:

198

for f in missing:

199

if debug:

199

if debug:

200

dbg('debug.copies: tracing file: %s\n' % f)

200

dbg('debug.copies: tracing file: %s\n' % f)

201

fctx = b[f]

201

fctx = b[f]

202

fctx._ancestrycontext = ancestrycontext

202

fctx._ancestrycontext = ancestrycontext

203

204

if debug:

204

if debug:

205

start = util.timer()

205

start = util.timer()

206

ofctx = _tracefile(fctx, am, limit)

206

ofctx = _tracefile(fctx, am, limit)

207

if ofctx:

207

if ofctx:

208

if debug:

208

if debug:

209

dbg('debug.copies: rename of: %s\n' % ofctx._path)

209

dbg('debug.copies: rename of: %s\n' % ofctx._path)

210

cm[f] = ofctx.path()

210

cm[f] = ofctx.path()

211

if debug:

211

if debug:

212

dbg('debug.copies: time: %f seconds\n'

212

dbg('debug.copies: time: %f seconds\n'

213

% (util.timer() - start))

213

% (util.timer() - start))

214

return cm

214

return cm

215

216

def _forwardcopies(a, b, match=None):

216

def _forwardcopies(a, b, match=None):

217

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

217

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

218

219

match = a.repo().narrowmatch(match)

219

match = a.repo().narrowmatch(match)

220

# check for working copy

220

# check for working copy

221

if b.rev() is None:

221

if b.rev() is None:

222

if a == b.p1():

222

if a == b.p1():

223

# short-circuit to avoid issues with merge states

223

# short-circuit to avoid issues with merge states

224

return _dirstatecopies(b, match)

224

return _dirstatecopies(b, match)

225

226

cm = _committedforwardcopies(a, b.p1(), match)

226

cm = _committedforwardcopies(a, b.p1(), match)

227

# combine copies from dirstate if necessary

227

# combine copies from dirstate if necessary

228

return _chain(a, b, cm, _dirstatecopies(b, match))

228

return _chain(a, b, cm, _dirstatecopies(b, match))

229

return _committedforwardcopies(a, b, match)

229

return _committedforwardcopies(a, b, match)

230

231

def _backwardrenames(a, b):

231

def _backwardrenames(a, b):

232

if a._repo.ui.config('experimental', 'copytrace') == 'off':

232

if a._repo.ui.config('experimental', 'copytrace') == 'off':

233

return {}

233

return {}

234

235

# Even though we're not taking copies into account, 1:n rename situations

235

# Even though we're not taking copies into account, 1:n rename situations

236

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

236

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

237

# arbitrarily pick one of the renames.

237

# arbitrarily pick one of the renames.

238

f = _forwardcopies(b, a)

238

f = _forwardcopies(b, a)

239

r = {}

239

r = {}

240

for k, v in sorted(f.iteritems()):

240

for k, v in sorted(f.iteritems()):

241

# remove copies

241

# remove copies

242

if v in a:

242

if v in a:

243

continue

243

continue

244

r[v] = k

244

r[v] = k

245

return r

245

return r

246

247

def pathcopies(x, y, match=None):

247

def pathcopies(x, y, match=None):

248

"""find {dst@y: src@x} copy mapping for directed compare"""

248

"""find {dst@y: src@x} copy mapping for directed compare"""

249

repo = x._repo

249

repo = x._repo

250

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

250

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

251

if debug:

251

if debug:

252

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

252

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

253

% (x, y))

253

% (x, y))

254

if x == y or not x or not y:

254

if x == y or not x or not y:

255

return {}

255

return {}

256

a = y.ancestor(x)

256

a = y.ancestor(x)

257

if a == x:

257

if a == x:

258

if debug:

258

if debug:

259

repo.ui.debug('debug.copies: search mode: forward\n')

259

repo.ui.debug('debug.copies: search mode: forward\n')

260

return _forwardcopies(x, y, match=match)

260

return _forwardcopies(x, y, match=match)

261

if a == y:

261

if a == y:

262

if debug:

262

if debug:

263

repo.ui.debug('debug.copies: search mode: backward\n')

263

repo.ui.debug('debug.copies: search mode: backward\n')

264

return _backwardrenames(x, y)

264

return _backwardrenames(x, y)

265

if debug:

265

if debug:

266

repo.ui.debug('debug.copies: search mode: combined\n')

266

repo.ui.debug('debug.copies: search mode: combined\n')

267

return _chain(x, y, _backwardrenames(x, a),

267

return _chain(x, y, _backwardrenames(x, a),

268

_forwardcopies(a, y, match=match))

268

_forwardcopies(a, y, match=match))

269

270

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

270

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

271

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

271

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

272

and c2. This is its own function so extensions can easily wrap this call

272

and c2. This is its own function so extensions can easily wrap this call

273

to see what files mergecopies is about to process.

273

to see what files mergecopies is about to process.

274

275

Even though c1 and c2 are not used in this function, they are useful in

275

Even though c1 and c2 are not used in this function, they are useful in

276

other extensions for being able to read the file nodes of the changed files.

276

other extensions for being able to read the file nodes of the changed files.

277

278

"baselabel" can be passed to help distinguish the multiple computations

278

"baselabel" can be passed to help distinguish the multiple computations

279

done in the graft case.

279

done in the graft case.

280

"""

280

"""

281

u1 = sorted(addedinm1 - addedinm2)

281

u1 = sorted(addedinm1 - addedinm2)

282

u2 = sorted(addedinm2 - addedinm1)

282

u2 = sorted(addedinm2 - addedinm1)

283

284

header = " unmatched files in %s"

284

header = " unmatched files in %s"

285

if baselabel:

285

if baselabel:

286

header += ' (from %s)' % baselabel

286

header += ' (from %s)' % baselabel

287

if u1:

287

if u1:

288

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

288

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

289

if u2:

289

if u2:

290

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

290

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

291

292

return u1, u2

292

return u1, u2

293

294

def _makegetfctx(ctx):

294

def _makegetfctx(ctx):

295

"""return a 'getfctx' function suitable for _checkcopies usage

295

"""return a 'getfctx' function suitable for _checkcopies usage

296

297

We have to re-setup the function building 'filectx' for each

297

We have to re-setup the function building 'filectx' for each

298

'_checkcopies' to ensure the linkrev adjustment is properly setup for

298

'_checkcopies' to ensure the linkrev adjustment is properly setup for

299

each. Linkrev adjustment is important to avoid bug in rename

299

each. Linkrev adjustment is important to avoid bug in rename

300

detection. Moreover, having a proper '_ancestrycontext' setup ensures

300

detection. Moreover, having a proper '_ancestrycontext' setup ensures

301

the performance impact of this adjustment is kept limited. Without it,

301

the performance impact of this adjustment is kept limited. Without it,

302

each file could do a full dag traversal making the time complexity of

302

each file could do a full dag traversal making the time complexity of

303

the operation explode (see issue4537).

303

the operation explode (see issue4537).

304

305

This function exists here mostly to limit the impact on stable. Feel

305

This function exists here mostly to limit the impact on stable. Feel

306

free to refactor on default.

306

free to refactor on default.

307

"""

307

"""

308

rev = ctx.rev()

308

rev = ctx.rev()

309

repo = ctx._repo

309

repo = ctx._repo

310

ac = getattr(ctx, '_ancestrycontext', None)

310

ac = getattr(ctx, '_ancestrycontext', None)

311

if ac is None:

311

if ac is None:

312

revs = [rev]

312

revs = [rev]

313

if rev is None:

313

if rev is None:

314

revs = [p.rev() for p in ctx.parents()]

314

revs = [p.rev() for p in ctx.parents()]

315

ac = repo.changelog.ancestors(revs, inclusive=True)

315

ac = repo.changelog.ancestors(revs, inclusive=True)

316

ctx._ancestrycontext = ac

316

ctx._ancestrycontext = ac

317

def makectx(f, n):

317

def makectx(f, n):

318

if n in node.wdirfilenodeids: # in a working context?

318

if n in node.wdirfilenodeids: # in a working context?

319

if ctx.rev() is None:

319

if ctx.rev() is None:

320

return ctx.filectx(f)

320

return ctx.filectx(f)

321

return repo[None][f]

321

return repo[None][f]

322

fctx = repo.filectx(f, fileid=n)

322

fctx = repo.filectx(f, fileid=n)

323

# setup only needed for filectx not create from a changectx

323

# setup only needed for filectx not create from a changectx

324

fctx._ancestrycontext = ac

324

fctx._ancestrycontext = ac

325

fctx._descendantrev = rev

325

fctx._descendantrev = rev

326

return fctx

326

return fctx

327

return util.lrucachefunc(makectx)

327

return util.lrucachefunc(makectx)

328

329

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

329

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

330

"""combine partial copy paths"""

330

"""combine partial copy paths"""

331

remainder = {}

331

remainder = {}

332

for f in copyfrom:

332

for f in copyfrom:

333

if f in copyto:

333

if f in copyto:

334

finalcopy[copyto[f]] = copyfrom[f]

334

finalcopy[copyto[f]] = copyfrom[f]

335

del copyto[f]

335

del copyto[f]

336

for f in incompletediverge:

336

for f in incompletediverge:

337

assert f not in diverge

337

assert f not in diverge

338

ic = incompletediverge[f]

338

ic = incompletediverge[f]

339

if ic[0] in copyto:

339

if ic[0] in copyto:

340

diverge[f] = [copyto[ic[0]], ic[1]]

340

diverge[f] = [copyto[ic[0]], ic[1]]

341

else:

341

else:

342

remainder[f] = ic

342

remainder[f] = ic

343

return remainder

343

return remainder

344

345

def mergecopies(repo, c1, c2, base):

345

def mergecopies(repo, c1, c2, base):

346

"""

346

"""

347

The function calling different copytracing algorithms on the basis of config

347

The function calling different copytracing algorithms on the basis of config

348

which find moves and copies between context c1 and c2 that are relevant for

348

which find moves and copies between context c1 and c2 that are relevant for

349

merging. 'base' will be used as the merge base.

349

merging. 'base' will be used as the merge base.

350

351

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

351

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

352

files that were moved/ copied in one merge parent and modified in another.

352

files that were moved/ copied in one merge parent and modified in another.

353

For example:

353

For example:

354

355

o ---> 4 another commit

355

o ---> 4 another commit

356

|

356

|

357

| o ---> 3 commit that modifies a.txt

357

| o ---> 3 commit that modifies a.txt

358

| /

358

| /

359

o / ---> 2 commit that moves a.txt to b.txt

359

o / ---> 2 commit that moves a.txt to b.txt

360

|/

360

|/

361

o ---> 1 merge base

361

o ---> 1 merge base

362

363

If we try to rebase revision 3 on revision 4, since there is no a.txt in

363

If we try to rebase revision 3 on revision 4, since there is no a.txt in

364

revision 4, and if user have copytrace disabled, we prints the following

364

revision 4, and if user have copytrace disabled, we prints the following

365

message:

365

message:

366

367

```other changed <file> which local deleted```

367

```other changed <file> which local deleted```

368

369

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

369

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

370

"dirmove".

370

"dirmove".

371

372

"copy" is a mapping from destination name -> source name,

372

"copy" is a mapping from destination name -> source name,

373

where source is in c1 and destination is in c2 or vice-versa.

373

where source is in c1 and destination is in c2 or vice-versa.

374

375

"movewithdir" is a mapping from source name -> destination name,

375

"movewithdir" is a mapping from source name -> destination name,

376

where the file at source present in one context but not the other

376

where the file at source present in one context but not the other

377

needs to be moved to destination by the merge process, because the

377

needs to be moved to destination by the merge process, because the

378

other context moved the directory it is in.

378

other context moved the directory it is in.

379

380

"diverge" is a mapping of source name -> list of destination names

380

"diverge" is a mapping of source name -> list of destination names

381

for divergent renames.

381

for divergent renames.

382

383

"renamedelete" is a mapping of source name -> list of destination

383

"renamedelete" is a mapping of source name -> list of destination

384

names for files deleted in c1 that were renamed in c2 or vice-versa.

384

names for files deleted in c1 that were renamed in c2 or vice-versa.

385

386

"dirmove" is a mapping of detected source dir -> destination dir renames.

386

"dirmove" is a mapping of detected source dir -> destination dir renames.

387

This is needed for handling changes to new files previously grafted into

387

This is needed for handling changes to new files previously grafted into

388

renamed directories.

388

renamed directories.

389

"""

389

"""

390

# avoid silly behavior for update from empty dir

390

# avoid silly behavior for update from empty dir

391

if not c1 or not c2 or c1 == c2:

391

if not c1 or not c2 or c1 == c2:

392

return {}, {}, {}, {}, {}

392

return {}, {}, {}, {}, {}

393

394

# avoid silly behavior for parent -> working dir

394

# avoid silly behavior for parent -> working dir

395

if c2.node() is None and c1.node() == repo.dirstate.p1():

395

if c2.node() is None and c1.node() == repo.dirstate.p1():

396

return repo.dirstate.copies(), {}, {}, {}, {}

396

return repo.dirstate.copies(), {}, {}, {}, {}

397

398

copytracing = repo.ui.config('experimental', 'copytrace')

398

copytracing = repo.ui.config('experimental', 'copytrace')

399

boolctrace = stringutil.parsebool(copytracing)

399

boolctrace = stringutil.parsebool(copytracing)

400

401

# Copy trace disabling is explicitly below the node == p1 logic above

401

# Copy trace disabling is explicitly below the node == p1 logic above

402

# because the logic above is required for a simple copy to be kept across a

402

# because the logic above is required for a simple copy to be kept across a

403

# rebase.

403

# rebase.

404

if copytracing == 'heuristics':

404

if copytracing == 'heuristics':

405

# Do full copytracing if only non-public revisions are involved as

405

# Do full copytracing if only non-public revisions are involved as

406

# that will be fast enough and will also cover the copies which could

406

# that will be fast enough and will also cover the copies which could

407

# be missed by heuristics

407

# be missed by heuristics

408

if _isfullcopytraceable(repo, c1, base):

408

if _isfullcopytraceable(repo, c1, base):

409

return _fullcopytracing(repo, c1, c2, base)

409

return _fullcopytracing(repo, c1, c2, base)

410

return _heuristicscopytracing(repo, c1, c2, base)

410

return _heuristicscopytracing(repo, c1, c2, base)

411

elif boolctrace is False:

411

elif boolctrace is False:

412

# stringutil.parsebool() returns None when it is unable to parse the

412

# stringutil.parsebool() returns None when it is unable to parse the

413

# value, so we should rely on making sure copytracing is on such cases

413

# value, so we should rely on making sure copytracing is on such cases

414

return {}, {}, {}, {}, {}

414

return {}, {}, {}, {}, {}

415

else:

415

else:

416

return _fullcopytracing(repo, c1, c2, base)

416

return _fullcopytracing(repo, c1, c2, base)

417

418

def _isfullcopytraceable(repo, c1, base):

418

def _isfullcopytraceable(repo, c1, base):

419

""" Checks that if base, source and destination are all no-public branches,

419

""" Checks that if base, source and destination are all no-public branches,

420

if yes let's use the full copytrace algorithm for increased capabilities

420

if yes let's use the full copytrace algorithm for increased capabilities

421

since it will be fast enough.

421

since it will be fast enough.

422

423

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

423

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

424

number of changesets from c1 to base such that if number of changesets are

424

number of changesets from c1 to base such that if number of changesets are

425

more than the limit, full copytracing algorithm won't be used.

425

more than the limit, full copytracing algorithm won't be used.

426

"""

426

"""

427

if c1.rev() is None:

427

if c1.rev() is None:

428

c1 = c1.p1()

428

c1 = c1.p1()

429

if c1.mutable() and base.mutable():

429

if c1.mutable() and base.mutable():

430

sourcecommitlimit = repo.ui.configint('experimental',

430

sourcecommitlimit = repo.ui.configint('experimental',

431

'copytrace.sourcecommitlimit')

431

'copytrace.sourcecommitlimit')

432

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

432

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

433

return commits < sourcecommitlimit

433

return commits < sourcecommitlimit

434

return False

434

return False

435

436

def _fullcopytracing(repo, c1, c2, base):

436

def _fullcopytracing(repo, c1, c2, base):

437

""" The full copytracing algorithm which finds all the new files that were

437

""" The full copytracing algorithm which finds all the new files that were

438

added from merge base up to the top commit and for each file it checks if

438

added from merge base up to the top commit and for each file it checks if

439

this file was copied from another file.

439

this file was copied from another file.

440

441

This is pretty slow when a lot of changesets are involved but will track all

441

This is pretty slow when a lot of changesets are involved but will track all

442

the copies.

442

the copies.

443

"""

443

"""

444

# In certain scenarios (e.g. graft, update or rebase), base can be

444

# In certain scenarios (e.g. graft, update or rebase), base can be

445

# overridden We still need to know a real common ancestor in this case We

445

# overridden We still need to know a real common ancestor in this case We

446

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

446

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

447

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

447

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

448

# caller may not know if the revision passed in lieu of the CA is a genuine

448

# caller may not know if the revision passed in lieu of the CA is a genuine

449

# common ancestor or not without explicitly checking it, it's better to

449

# common ancestor or not without explicitly checking it, it's better to

450

# determine that here.

450

# determine that here.

451

#

451

#

452

# base.isancestorof(wc) is False, work around that

452

# base.isancestorof(wc) is False, work around that

453

_c1 = c1.p1() if c1.rev() is None else c1

453

_c1 = c1.p1() if c1.rev() is None else c1

454

_c2 = c2.p1() if c2.rev() is None else c2

454

_c2 = c2.p1() if c2.rev() is None else c2

455

# an endpoint is "dirty" if it isn't a descendant of the merge base

455

# an endpoint is "dirty" if it isn't a descendant of the merge base

456

# if we have a dirty endpoint, we need to trigger graft logic, and also

456

# if we have a dirty endpoint, we need to trigger graft logic, and also

457

# keep track of which endpoint is dirty

457

# keep track of which endpoint is dirty

458

dirtyc1 = not base.isancestorof(_c1)

458

dirtyc1 = not base.isancestorof(_c1)

459

dirtyc2 = not base.isancestorof(_c2)

459

dirtyc2 = not base.isancestorof(_c2)

460

graft = dirtyc1 or dirtyc2

460

graft = dirtyc1 or dirtyc2

461

tca = base

461

tca = base

462

if graft:

462

if graft:

463

tca = _c1.ancestor(_c2)

463

tca = _c1.ancestor(_c2)

464

465

limit = _findlimit(repo, c1, c2)

465

limit = _findlimit(repo, c1, c2)

466

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

466

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

467

468

m1 = c1.manifest()

468

m1 = c1.manifest()

469

m2 = c2.manifest()

469

m2 = c2.manifest()

470

mb = base.manifest()

470

mb = base.manifest()

471

472

# gather data from _checkcopies:

472

# gather data from _checkcopies:

473

# - diverge = record all diverges in this dict

473

# - diverge = record all diverges in this dict

474

# - copy = record all non-divergent copies in this dict

474

# - copy = record all non-divergent copies in this dict

475

# - fullcopy = record all copies in this dict

475

# - fullcopy = record all copies in this dict

476

# - incomplete = record non-divergent partial copies here

476

# - incomplete = record non-divergent partial copies here

477

# - incompletediverge = record divergent partial copies here

477

# - incompletediverge = record divergent partial copies here

478

diverge = {} # divergence data is shared

478

diverge = {} # divergence data is shared

479

incompletediverge = {}

479

incompletediverge = {}

480

data1 = {'copy': {},

480

data1 = {'copy': {},

481

'fullcopy': {},

481

'fullcopy': {},

482

'incomplete': {},

482

'incomplete': {},

483

'diverge': diverge,

483

'diverge': diverge,

484

'incompletediverge': incompletediverge,

484

'incompletediverge': incompletediverge,

485

}

485

}

486

data2 = {'copy': {},

486

data2 = {'copy': {},

487

'fullcopy': {},

487

'fullcopy': {},

488

'incomplete': {},

488

'incomplete': {},

489

'diverge': diverge,

489

'diverge': diverge,

490

'incompletediverge': incompletediverge,

490

'incompletediverge': incompletediverge,

491

}

491

}

492

493

# find interesting file sets from manifests

493

# find interesting file sets from manifests

494

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

494

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

495

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

495

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

496

bothnew = sorted(addedinm1 & addedinm2)

496

bothnew = sorted(addedinm1 & addedinm2)

497

if tca == base:

497

if tca == base:

498

# unmatched file from base

498

# unmatched file from base

499

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

499

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

500

u1u, u2u = u1r, u2r

500

u1u, u2u = u1r, u2r

501

else:

501

else:

502

# unmatched file from base (DAG rotation in the graft case)

502

# unmatched file from base (DAG rotation in the graft case)

503

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

503

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

504

baselabel='base')

504

baselabel='base')

505

# unmatched file from topological common ancestors (no DAG rotation)

505

# unmatched file from topological common ancestors (no DAG rotation)

506

# need to recompute this for directory move handling when grafting

506

# need to recompute this for directory move handling when grafting

507

mta = tca.manifest()

507

mta = tca.manifest()

508

u1u, u2u = _computenonoverlap(repo, c1, c2,

508

u1u, u2u = _computenonoverlap(repo, c1, c2,

509

m1.filesnotin(mta, repo.narrowmatch()),

509

m1.filesnotin(mta, repo.narrowmatch()),

510

m2.filesnotin(mta, repo.narrowmatch()),

510

m2.filesnotin(mta, repo.narrowmatch()),

511

baselabel='topological common ancestor')

511

baselabel='topological common ancestor')

512

513

for f in u1u:

513

for f in u1u:

514

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

514

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

515

516

for f in u2u:

516

for f in u2u:

517

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

517

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

518

519

copy = dict(data1['copy'])

519

copy = dict(data1['copy'])

520

copy.update(data2['copy'])

520

copy.update(data2['copy'])

521

fullcopy = dict(data1['fullcopy'])

521

fullcopy = dict(data1['fullcopy'])

522

fullcopy.update(data2['fullcopy'])

522

fullcopy.update(data2['fullcopy'])

523

524

if dirtyc1:

524

if dirtyc1:

525

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

525

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

526

incompletediverge)

526

incompletediverge)

527

else:

527

else:

528

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

528

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

529

incompletediverge)

529

incompletediverge)

530

531

renamedelete = {}

531

renamedelete = {}

532

renamedeleteset = set()

532

renamedeleteset = set()

533

divergeset = set()

533

divergeset = set()

534

for of, fl in list(diverge.items()):

534

for of, fl in list(diverge.items()):

535

if len(fl) == 1 or of in c1 or of in c2:

535

if len(fl) == 1 or of in c1 or of in c2:

536

del diverge[of] # not actually divergent, or not a rename

536

del diverge[of] # not actually divergent, or not a rename

537

if of not in c1 and of not in c2:

537

if of not in c1 and of not in c2:

538

# renamed on one side, deleted on the other side, but filter

538

# renamed on one side, deleted on the other side, but filter

539

# out files that have been renamed and then deleted

539

# out files that have been renamed and then deleted

540

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

540

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

541

renamedeleteset.update(fl) # reverse map for below

541

renamedeleteset.update(fl) # reverse map for below

542

else:

542

else:

543

divergeset.update(fl) # reverse map for below

543

divergeset.update(fl) # reverse map for below

544

545

if bothnew:

545

if bothnew:

546

repo.ui.debug(" unmatched files new in both:\n %s\n"

546

repo.ui.debug(" unmatched files new in both:\n %s\n"

547

% "\n ".join(bothnew))

547

% "\n ".join(bothnew))

548

bothdiverge = {}

548

bothdiverge = {}

549

bothincompletediverge = {}

549

bothincompletediverge = {}

550

remainder = {}

550

remainder = {}

551

both1 = {'copy': {},

551

both1 = {'copy': {},

552

'fullcopy': {},

552

'fullcopy': {},

553

'incomplete': {},

553

'incomplete': {},

554

'diverge': bothdiverge,

554

'diverge': bothdiverge,

555

'incompletediverge': bothincompletediverge

555

'incompletediverge': bothincompletediverge

556

}

556

}

557

both2 = {'copy': {},

557

both2 = {'copy': {},

558

'fullcopy': {},

558

'fullcopy': {},

559

'incomplete': {},

559

'incomplete': {},

560

'diverge': bothdiverge,

560

'diverge': bothdiverge,

561

'incompletediverge': bothincompletediverge

561

'incompletediverge': bothincompletediverge

562

}

562

}

563

for f in bothnew:

563

for f in bothnew:

564

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

564

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

565

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

565

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

566

if dirtyc1:

566

if dirtyc1:

567

# incomplete copies may only be found on the "dirty" side for bothnew

567

# incomplete copies may only be found on the "dirty" side for bothnew

568

assert not both2['incomplete']

568

assert not both2['incomplete']

569

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

569

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

570

bothincompletediverge)

570

bothincompletediverge)

571

elif dirtyc2:

571

elif dirtyc2:

572

assert not both1['incomplete']

572

assert not both1['incomplete']

573

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

573

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

574

bothincompletediverge)

574

bothincompletediverge)

575

else:

575

else:

576

# incomplete copies and divergences can't happen outside grafts

576

# incomplete copies and divergences can't happen outside grafts

577

assert not both1['incomplete']

577

assert not both1['incomplete']

578

assert not both2['incomplete']

578

assert not both2['incomplete']

579

assert not bothincompletediverge

579

assert not bothincompletediverge

580

for f in remainder:

580

for f in remainder:

581

assert f not in bothdiverge

581

assert f not in bothdiverge

582

ic = remainder[f]

582

ic = remainder[f]

583

if ic[0] in (m1 if dirtyc1 else m2):

583

if ic[0] in (m1 if dirtyc1 else m2):

584

# backed-out rename on one side, but watch out for deleted files

584

# backed-out rename on one side, but watch out for deleted files

585

bothdiverge[f] = ic

585

bothdiverge[f] = ic

586

for of, fl in bothdiverge.items():

586

for of, fl in bothdiverge.items():

587

if len(fl) == 2 and fl[0] == fl[1]:

587

if len(fl) == 2 and fl[0] == fl[1]:

588

copy[fl[0]] = of # not actually divergent, just matching renames

588

copy[fl[0]] = of # not actually divergent, just matching renames

589

590

if fullcopy and repo.ui.debugflag:

590

if fullcopy and repo.ui.debugflag:

591

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

591

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

592

"% = renamed and deleted):\n")

592

"% = renamed and deleted):\n")

593

for f in sorted(fullcopy):

593

for f in sorted(fullcopy):

594

note = ""

594

note = ""

595

if f in copy:

595

if f in copy:

596

note += "*"

596

note += "*"

597

if f in divergeset:

597

if f in divergeset:

598

note += "!"

598

note += "!"

599

if f in renamedeleteset:

599

if f in renamedeleteset:

600

note += "%"

600

note += "%"

601

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

601

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

602

note))

602

note))

603

del divergeset

603

del divergeset

604

605

if not fullcopy:

605

if not fullcopy:

606

return copy, {}, diverge, renamedelete, {}

606

return copy, {}, diverge, renamedelete, {}

607

608

repo.ui.debug(" checking for directory renames\n")

608

repo.ui.debug(" checking for directory renames\n")

609

610

# generate a directory move map

610

# generate a directory move map

611

d1, d2 = c1.dirs(), c2.dirs()

611

d1, d2 = c1.dirs(), c2.dirs()

612

# Hack for adding '', which is not otherwise added, to d1 and d2

612

# Hack for adding '', which is not otherwise added, to d1 and d2

613

d1.addpath('/')

613

d1.addpath('/')

614

d2.addpath('/')

614

d2.addpath('/')

615

invalid = set()

615

invalid = set()

616

dirmove = {}

616

dirmove = {}

617

618

# examine each file copy for a potential directory move, which is

618

# examine each file copy for a potential directory move, which is

619

# when all the files in a directory are moved to a new directory

619

# when all the files in a directory are moved to a new directory

620

for dst, src in fullcopy.iteritems():

620

for dst, src in fullcopy.iteritems():

621

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

621

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

622

if dsrc in invalid:

622

if dsrc in invalid:

623

# already seen to be uninteresting

623

# already seen to be uninteresting

624

continue

624

continue

625

elif dsrc in d1 and ddst in d1:

625

elif dsrc in d1 and ddst in d1:

626

# directory wasn't entirely moved locally

626

# directory wasn't entirely moved locally

627

invalid.add(dsrc)

627

invalid.add(dsrc)

628

elif dsrc in d2 and ddst in d2:

628

elif dsrc in d2 and ddst in d2:

629

# directory wasn't entirely moved remotely

629

# directory wasn't entirely moved remotely

630

invalid.add(dsrc)

630

invalid.add(dsrc)

631

elif dsrc in dirmove and dirmove[dsrc] != ddst:

631

elif dsrc in dirmove and dirmove[dsrc] != ddst:

632

# files from the same directory moved to two different places

632

# files from the same directory moved to two different places

633

invalid.add(dsrc)

633

invalid.add(dsrc)

634

else:

634

else:

635

# looks good so far

635

# looks good so far

636

dirmove[dsrc] = ddst

636

dirmove[dsrc] = ddst

637

638

for i in invalid:

638

for i in invalid:

639

if i in dirmove:

639

if i in dirmove:

640

del dirmove[i]

640

del dirmove[i]

641

del d1, d2, invalid

641

del d1, d2, invalid

642

643

if not dirmove:

643

if not dirmove:

644

return copy, {}, diverge, renamedelete, {}

644

return copy, {}, diverge, renamedelete, {}

645

646

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

646

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

647

648

for d in dirmove:

648

for d in dirmove:

649

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

649

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

650

(d, dirmove[d]))

650

(d, dirmove[d]))

651

652

movewithdir = {}

652

movewithdir = {}

653

# check unaccounted nonoverlapping files against directory moves

653

# check unaccounted nonoverlapping files against directory moves

654

for f in u1r + u2r:

654

for f in u1r + u2r:

655

if f not in fullcopy:

655

if f not in fullcopy:

656

for d in dirmove:

656

for d in dirmove:

657

if f.startswith(d):

657

if f.startswith(d):

658

# new file added in a directory that was moved, move it

658

# new file added in a directory that was moved, move it

659

df = dirmove[d] + f[len(d):]

659

df = dirmove[d] + f[len(d):]

660

if df not in copy:

660

if df not in copy:

661

movewithdir[f] = df

661

movewithdir[f] = df

662

repo.ui.debug((" pending file src: '%s' -> "

662

repo.ui.debug((" pending file src: '%s' -> "

663

"dst: '%s'\n") % (f, df))

663

"dst: '%s'\n") % (f, df))

664

break

664

break

665

666

return copy, movewithdir, diverge, renamedelete, dirmove

666

return copy, movewithdir, diverge, renamedelete, dirmove

667

668

def _heuristicscopytracing(repo, c1, c2, base):

668

def _heuristicscopytracing(repo, c1, c2, base):

669

""" Fast copytracing using filename heuristics

669

""" Fast copytracing using filename heuristics

670

671

Assumes that moves or renames are of following two types:

671

Assumes that moves or renames are of following two types:

672

673

1) Inside a directory only (same directory name but different filenames)

673

1) Inside a directory only (same directory name but different filenames)

674

2) Move from one directory to another

674

2) Move from one directory to another

675

(same filenames but different directory names)

675

(same filenames but different directory names)

676

677

Works only when there are no merge commits in the "source branch".

677

Works only when there are no merge commits in the "source branch".

678

Source branch is commits from base up to c2 not including base.

678

Source branch is commits from base up to c2 not including base.

679

680

If merge is involved it fallbacks to _fullcopytracing().

680

If merge is involved it fallbacks to _fullcopytracing().

681

682

Can be used by setting the following config:

682

Can be used by setting the following config:

683

684

[experimental]

684

[experimental]

685

copytrace = heuristics

685

copytrace = heuristics

686

687

In some cases the copy/move candidates found by heuristics can be very large

687

In some cases the copy/move candidates found by heuristics can be very large

688

in number and that will make the algorithm slow. The number of possible

688

in number and that will make the algorithm slow. The number of possible

689

candidates to check can be limited by using the config

689

candidates to check can be limited by using the config

690

`experimental.copytrace.movecandidateslimit` which defaults to 100.

690

`experimental.copytrace.movecandidateslimit` which defaults to 100.

691

"""

691

"""

692

693

if c1.rev() is None:

693

if c1.rev() is None:

694

c1 = c1.p1()

694

c1 = c1.p1()

695

if c2.rev() is None:

695

if c2.rev() is None:

696

c2 = c2.p1()

696

c2 = c2.p1()

697

698

copies = {}

698

copies = {}

699

700

changedfiles = set()

700

changedfiles = set()

701

m1 = c1.manifest()

701

m1 = c1.manifest()

702

if not repo.revs('%d::%d', base.rev(), c2.rev()):

702

if not repo.revs('%d::%d', base.rev(), c2.rev()):

703

# If base is not in c2 branch, we switch to fullcopytracing

703

# If base is not in c2 branch, we switch to fullcopytracing

704

repo.ui.debug("switching to full copytracing as base is not "

704

repo.ui.debug("switching to full copytracing as base is not "

705

"an ancestor of c2\n")

705

"an ancestor of c2\n")

706

return _fullcopytracing(repo, c1, c2, base)

706

return _fullcopytracing(repo, c1, c2, base)

707

708

ctx = c2

708

ctx = c2

709

while ctx != base:

709

while ctx != base:

710

if len(ctx.parents()) == 2:

710

if len(ctx.parents()) == 2:

711

# To keep things simple let's not handle merges

711

# To keep things simple let's not handle merges

712

repo.ui.debug("switching to full copytracing because of merges\n")

712

repo.ui.debug("switching to full copytracing because of merges\n")

713

return _fullcopytracing(repo, c1, c2, base)

713

return _fullcopytracing(repo, c1, c2, base)

714

changedfiles.update(ctx.files())

714

changedfiles.update(ctx.files())

715

ctx = ctx.p1()

715

ctx = ctx.p1()

716

717

cp = _forwardcopies(base, c2)

717

cp = _forwardcopies(base, c2)

718

for dst, src in cp.iteritems():

718

for dst, src in cp.iteritems():

719

if src in m1:

719

if src in m1:

720

copies[dst] = src

720

copies[dst] = src

721

722

# file is missing if it isn't present in the destination, but is present in

722

# file is missing if it isn't present in the destination, but is present in

723

# the base and present in the source.

723

# the base and present in the source.

724

# Presence in the base is important to exclude added files, presence in the

724

# Presence in the base is important to exclude added files, presence in the

725

# source is important to exclude removed files.

725

# source is important to exclude removed files.

726

filt = lambda f: f not in m1 and f in base and f in c2

726

filt = lambda f: f not in m1 and f in base and f in c2

727

missingfiles = [f for f in changedfiles if filt(f)]

727

missingfiles = [f for f in changedfiles if filt(f)]

728

729

if missingfiles:

729

if missingfiles:

730

basenametofilename = collections.defaultdict(list)

730

basenametofilename = collections.defaultdict(list)

731

dirnametofilename = collections.defaultdict(list)

731

dirnametofilename = collections.defaultdict(list)

732

733

for f in m1.filesnotin(base.manifest()):

733

for f in m1.filesnotin(base.manifest()):

734

basename = os.path.basename(f)

734

basename = os.path.basename(f)

735

dirname = os.path.dirname(f)

735

dirname = os.path.dirname(f)

736

basenametofilename[basename].append(f)

736

basenametofilename[basename].append(f)

737

dirnametofilename[dirname].append(f)

737

dirnametofilename[dirname].append(f)

738

739

for f in missingfiles:

739

for f in missingfiles:

740

basename = os.path.basename(f)

740

basename = os.path.basename(f)

741

dirname = os.path.dirname(f)

741

dirname = os.path.dirname(f)

742

samebasename = basenametofilename[basename]

742

samebasename = basenametofilename[basename]

743

samedirname = dirnametofilename[dirname]

743

samedirname = dirnametofilename[dirname]

744

movecandidates = samebasename + samedirname

744

movecandidates = samebasename + samedirname

745

# f is guaranteed to be present in c2, that's why

745

# f is guaranteed to be present in c2, that's why

746

# c2.filectx(f) won't fail

746

# c2.filectx(f) won't fail

747

f2 = c2.filectx(f)

747

f2 = c2.filectx(f)

748

# we can have a lot of candidates which can slow down the heuristics

748

# we can have a lot of candidates which can slow down the heuristics

749

# config value to limit the number of candidates moves to check

749

# config value to limit the number of candidates moves to check

750

maxcandidates = repo.ui.configint('experimental',

750

maxcandidates = repo.ui.configint('experimental',

751

'copytrace.movecandidateslimit')

751

'copytrace.movecandidateslimit')

752

753

if len(movecandidates) > maxcandidates:

753

if len(movecandidates) > maxcandidates:

754

repo.ui.status(_("skipping copytracing for '%s', more "

754

repo.ui.status(_("skipping copytracing for '%s', more "

755

"candidates than the limit: %d\n")

755

"candidates than the limit: %d\n")

756

% (f, len(movecandidates)))

756

% (f, len(movecandidates)))

757

continue

757

continue

758

759

for candidate in movecandidates:

759

for candidate in movecandidates:

760

f1 = c1.filectx(candidate)

760

f1 = c1.filectx(candidate)

761

if _related(f1, f2):

761

if _related(f1, f2):

762

# if there are a few related copies then we'll merge

762

# if there are a few related copies then we'll merge

763

# changes into all of them. This matches the behaviour

763

# changes into all of them. This matches the behaviour

764

# of upstream copytracing

764

# of upstream copytracing

765

copies[candidate] = f

765

copies[candidate] = f

766

767

return copies, {}, {}, {}, {}

767

return copies, {}, {}, {}, {}

768

769

def _related(f1, f2):

769

def _related(f1, f2):

770

"""return True if f1 and f2 filectx have a common ancestor

770

"""return True if f1 and f2 filectx have a common ancestor

771

772

Walk back to common ancestor to see if the two files originate

772

Walk back to common ancestor to see if the two files originate

773

from the same file. Since workingfilectx's rev() is None it messes

773

from the same file. Since workingfilectx's rev() is None it messes

774

up the integer comparison logic, hence the pre-step check for

774

up the integer comparison logic, hence the pre-step check for

775

None (f1 and f2 can only be workingfilectx's initially).

775

None (f1 and f2 can only be workingfilectx's initially).

776

"""

776

"""

777

778

if f1 == f2:

778

if f1 == f2:

779

return f1 # a match

779

return True # a match

780

781

g1, g2 = f1.ancestors(), f2.ancestors()

781

g1, g2 = f1.ancestors(), f2.ancestors()

782

try:

782

try:

783

f1r, f2r = f1.linkrev(), f2.linkrev()

783

f1r, f2r = f1.linkrev(), f2.linkrev()

784

785

if f1r is None:

785

if f1r is None:

786

f1 = next(g1)

786

f1 = next(g1)

787

if f2r is None:

787

if f2r is None:

788

f2 = next(g2)

788

f2 = next(g2)

789

790

while True:

790

while True:

791

f1r, f2r = f1.linkrev(), f2.linkrev()

791

f1r, f2r = f1.linkrev(), f2.linkrev()

792

if f1r > f2r:

792

if f1r > f2r:

793

f1 = next(g1)

793

f1 = next(g1)

794

elif f2r > f1r:

794

elif f2r > f1r:

795

f2 = next(g2)

795

f2 = next(g2)

796

else: # f1 and f2 point to files in the same linkrev

796

else: # f1 and f2 point to files in the same linkrev

797

return f1 == f2 # true if they point to the same file

797

return f1 == f2 # true if they point to the same file

798

except StopIteration:

798

except StopIteration:

799

return False

799

return False

800

801

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

801

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

802

"""

802

"""

803

check possible copies of f from msrc to mdst

803

check possible copies of f from msrc to mdst

804

805

srcctx = starting context for f in msrc

805

srcctx = starting context for f in msrc

806

dstctx = destination context for f in mdst

806

dstctx = destination context for f in mdst

807

f = the filename to check (as in msrc)

807

f = the filename to check (as in msrc)

808

base = the changectx used as a merge base

808

base = the changectx used as a merge base

809

tca = topological common ancestor for graft-like scenarios

809

tca = topological common ancestor for graft-like scenarios

810

remotebase = True if base is outside tca::srcctx, False otherwise

810

remotebase = True if base is outside tca::srcctx, False otherwise

811

limit = the rev number to not search beyond

811

limit = the rev number to not search beyond

812

data = dictionary of dictionary to store copy data. (see mergecopies)

812

data = dictionary of dictionary to store copy data. (see mergecopies)

813

814

note: limit is only an optimization, and provides no guarantee that

814

note: limit is only an optimization, and provides no guarantee that

815

irrelevant revisions will not be visited

815

irrelevant revisions will not be visited

816

there is no easy way to make this algorithm stop in a guaranteed way

816

there is no easy way to make this algorithm stop in a guaranteed way

817

once it "goes behind a certain revision".

817

once it "goes behind a certain revision".

818

"""

818

"""

819

820

msrc = srcctx.manifest()

820

msrc = srcctx.manifest()

821

mdst = dstctx.manifest()

821

mdst = dstctx.manifest()

822

mb = base.manifest()

822

mb = base.manifest()

823

mta = tca.manifest()

823

mta = tca.manifest()

824

# Might be true if this call is about finding backward renames,

824

# Might be true if this call is about finding backward renames,

825

# This happens in the case of grafts because the DAG is then rotated.

825

# This happens in the case of grafts because the DAG is then rotated.

826

# If the file exists in both the base and the source, we are not looking

826

# If the file exists in both the base and the source, we are not looking

827

# for a rename on the source side, but on the part of the DAG that is

827

# for a rename on the source side, but on the part of the DAG that is

828

# traversed backwards.

828

# traversed backwards.

829

#

829

#

830

# In the case there is both backward and forward renames (before and after

830

# In the case there is both backward and forward renames (before and after

831

# the base) this is more complicated as we must detect a divergence.

831

# the base) this is more complicated as we must detect a divergence.

832

# We use 'backwards = False' in that case.

832

# We use 'backwards = False' in that case.

833

backwards = not remotebase and base != tca and f in mb

833

backwards = not remotebase and base != tca and f in mb

834

getsrcfctx = _makegetfctx(srcctx)

834

getsrcfctx = _makegetfctx(srcctx)

835

getdstfctx = _makegetfctx(dstctx)

835

getdstfctx = _makegetfctx(dstctx)

836

837

if msrc[f] == mb.get(f) and not remotebase:

837

if msrc[f] == mb.get(f) and not remotebase:

838

# Nothing to merge

838

# Nothing to merge

839

return

839

return

840

841

of = None

841

of = None

842

seen = {f}

842

seen = {f}

843

for oc in getsrcfctx(f, msrc[f]).ancestors():

843

for oc in getsrcfctx(f, msrc[f]).ancestors():

844

of = oc.path()

844

of = oc.path()

845

if of in seen:

845

if of in seen:

846

# check limit late - grab last rename before

846

# check limit late - grab last rename before

847

if oc.linkrev() < limit:

847

if oc.linkrev() < limit:

848

break

848

break

849

continue

849

continue

850

seen.add(of)

850

seen.add(of)

851

852

# remember for dir rename detection

852

# remember for dir rename detection

853

if backwards:

853

if backwards:

854

data['fullcopy'][of] = f # grafting backwards through renames

854

data['fullcopy'][of] = f # grafting backwards through renames

855

else:

855

else:

856

data['fullcopy'][f] = of

856

data['fullcopy'][f] = of

857

if of not in mdst:

857

if of not in mdst:

858

continue # no match, keep looking

858

continue # no match, keep looking

859

if mdst[of] == mb.get(of):

859

if mdst[of] == mb.get(of):

860

return # no merge needed, quit early

860

return # no merge needed, quit early

861

c2 = getdstfctx(of, mdst[of])

861

c2 = getdstfctx(of, mdst[of])

862

# c2 might be a plain new file on added on destination side that is

862

# c2 might be a plain new file on added on destination side that is

863

# unrelated to the droids we are looking for.

863

# unrelated to the droids we are looking for.

864

cr = _related(oc, c2)

864

cr = _related(oc, c2)

865

if cr and (of == f or of == c2.path()): # non-divergent

865

if cr and (of == f or of == c2.path()): # non-divergent

866

if backwards:

866

if backwards:

867

data['copy'][of] = f

867

data['copy'][of] = f

868

elif of in mb:

868

elif of in mb:

869

data['copy'][f] = of

869

data['copy'][f] = of

870

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

870

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

871

data['copy'][of] = f

871

data['copy'][of] = f

872

del data['fullcopy'][f]

872

del data['fullcopy'][f]

873

data['fullcopy'][of] = f

873

data['fullcopy'][of] = f

874

else: # divergence w.r.t. graft CA on one side of topological CA

874

else: # divergence w.r.t. graft CA on one side of topological CA

875

for sf in seen:

875

for sf in seen:

876

if sf in mb:

876

if sf in mb:

877

assert sf not in data['diverge']

877

assert sf not in data['diverge']

878

data['diverge'][sf] = [f, of]

878

data['diverge'][sf] = [f, of]

879

break

879

break

880

return

880

return

881

882

if of in mta:

882

if of in mta:

883

if backwards or remotebase:

883

if backwards or remotebase:

884

data['incomplete'][of] = f

884

data['incomplete'][of] = f

885

else:

885

else:

886

for sf in seen:

886

for sf in seen:

887

if sf in mb:

887

if sf in mb:

888

if tca == base:

888

if tca == base:

889

data['diverge'].setdefault(sf, []).append(f)

889

data['diverge'].setdefault(sf, []).append(f)

890

else:

890

else:

891

data['incompletediverge'][sf] = [of, f]

891

data['incompletediverge'][sf] = [of, f]

892

return

892

return

893

894

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

894

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

895

"""reproduce copies from fromrev to rev in the dirstate

895

"""reproduce copies from fromrev to rev in the dirstate

896

897

If skiprev is specified, it's a revision that should be used to

897

If skiprev is specified, it's a revision that should be used to

898

filter copy records. Any copies that occur between fromrev and

898

filter copy records. Any copies that occur between fromrev and

899

skiprev will not be duplicated, even if they appear in the set of

899

skiprev will not be duplicated, even if they appear in the set of

900

copies between fromrev and rev.

900

copies between fromrev and rev.

901

"""

901

"""

902

exclude = {}

902

exclude = {}

903

ctraceconfig = repo.ui.config('experimental', 'copytrace')

903

ctraceconfig = repo.ui.config('experimental', 'copytrace')

904

bctrace = stringutil.parsebool(ctraceconfig)

904

bctrace = stringutil.parsebool(ctraceconfig)

905

if (skiprev is not None and

905

if (skiprev is not None and

906

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

906

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

907

# copytrace='off' skips this line, but not the entire function because

907

# copytrace='off' skips this line, but not the entire function because

908

# the line below is O(size of the repo) during a rebase, while the rest

908

# the line below is O(size of the repo) during a rebase, while the rest

909

# of the function is much faster (and is required for carrying copy

909

# of the function is much faster (and is required for carrying copy

910

# metadata across the rebase anyway).

910

# metadata across the rebase anyway).

911

exclude = pathcopies(repo[fromrev], repo[skiprev])

911

exclude = pathcopies(repo[fromrev], repo[skiprev])

912

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

912

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

913

# copies.pathcopies returns backward renames, so dst might not

913

# copies.pathcopies returns backward renames, so dst might not

914

# actually be in the dirstate

914

# actually be in the dirstate

915

if dst in exclude:

915

if dst in exclude:

916

continue

916

continue

917

wctx[dst].markcopied(src)

917

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             def _findlimit(repo, ctxa, ctxb):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 wdirparents = None
                 a = ctxa.rev()
                 b = ctxb.rev()
                 if a is None:
                     wdirparents = (ctxa.p1(), ctxa.p2())
                     a = node.wdirrev
                 if b is None:
                     assert not wdirparents
                     wdirparents = (ctxb.p1(), ctxb.p2())
                     b = node.wdirrev
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 limit = node.wdirrev
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == node.wdirrev:
                         parents = [pctx.rev() for pctx in wdirparents]
                     else:
                         parents = cl.parentrevs(r)
                     if parents[1] == node.nullrev:
                         parents = parents[:1]
                     for p in parents:
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in list(t.items()):
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=node.nullrev):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and not f.isintroducedafter(limit):
                         return None
             def _dirstatecopies(d, match=None):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg('debug.copies:    looking into rename from %s to %s\n'
                         % (a, b))
                 limit = _findlimit(repo, a, b)
                 if debug:
                     dbg('debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg('debug.copies:      missing file to search: %d\n' % len(missing))
                 for f in missing:
                     if debug:
                         dbg('debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         if debug:
                             dbg('debug.copies:          rename of: %s\n' % ofctx._path)
                         cm[f] = ofctx.path()
                     if debug:
                         dbg('debug.copies:          time: %f seconds\n'
                             % (util.timer() - start))
                 return cm
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 if debug:
                     repo.ui.debug('debug.copies: searching copies from %s to %s\n'
                                   % (x, y))
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: forward\n')
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: backward\n')
                     return _backwardrenames(x, y)
                 if debug:
                     repo.ui.debug('debug.copies: search mode: combined\n')
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirfilenodeids:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 boolctrace = stringutil.parsebool(copytracing)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 elif boolctrace is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.isancestorof(wc) is False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not base.isancestorof(_c1)
                 dirtyc2 = not base.isancestorof(_c2)
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1, c2)
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2,
                                                   m1.filesnotin(mta, repo.narrowmatch()),
                                                   m2.filesnotin(mta, repo.narrowmatch()),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
-                    return f1 # a match
+                    return True # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if oc.linkrev() < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config('experimental', 'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if (skiprev is not None and
                     (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)