upstream/mercurial-mirror Commit - r41267:e3e1b063

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

from .utils import (

23

from .utils import (

24

stringutil,

24

stringutil,

25

)

25

)

26

27

def _findlimit(repo, a, b):

27

def _findlimit(repo, a, b):

28

"""

28

"""

29

Find the last revision that needs to be checked to ensure that a full

29

Find the last revision that needs to be checked to ensure that a full

30

transitive closure for file copies can be properly calculated.

30

transitive closure for file copies can be properly calculated.

31

Generally, this means finding the earliest revision number that's an

31

Generally, this means finding the earliest revision number that's an

32

ancestor of a or b but not both, except when a or b is a direct descendent

32

ancestor of a or b but not both, except when a or b is a direct descendent

33

of the other, in which case we can return the minimum revnum of a and b.

33

of the other, in which case we can return the minimum revnum of a and b.

34

None if no such revision exists.

34

None if no such revision exists.

35

"""

35

"""

36

37

# basic idea:

37

# basic idea:

38

# - mark a and b with different sides

38

# - mark a and b with different sides

39

# - if a parent's children are all on the same side, the parent is

39

# - if a parent's children are all on the same side, the parent is

40

# on that side, otherwise it is on no side

40

# on that side, otherwise it is on no side

41

# - walk the graph in topological order with the help of a heap;

41

# - walk the graph in topological order with the help of a heap;

42

# - add unseen parents to side map

42

# - add unseen parents to side map

43

# - clear side of any parent that has children on different sides

43

# - clear side of any parent that has children on different sides

44

# - track number of interesting revs that might still be on a side

44

# - track number of interesting revs that might still be on a side

45

# - track the lowest interesting rev seen

45

# - track the lowest interesting rev seen

46

# - quit when interesting revs is zero

46

# - quit when interesting revs is zero

47

48

cl = repo.changelog

48

cl = repo.changelog

49

working = len(cl) # pseudo rev for the working directory

50

if a is None:

49

if a is None:

51

a = ~~working~~

50

a = node.wdirrev

52

if b is None:

51

if b is None:

53

b = ~~working~~

52

b = node.wdirrev

54

53

55

side = {a: -1, b: 1}

54

side = {a: -1, b: 1}

56

visit = [-a, -b]

55

visit = [-a, -b]

57

heapq.heapify(visit)

56

heapq.heapify(visit)

58

interesting = len(visit)

57

interesting = len(visit)

59

hascommonancestor = False

58

hascommonancestor = False

60

limit = ~~working~~

59

limit = node.wdirrev

61

60

62

while interesting:

61

while interesting:

63

r = -heapq.heappop(visit)

62

r = -heapq.heappop(visit)

64

if r == ~~working~~:

63

if r == node.wdirrev:

65

parents = [cl.rev(p) for p in repo.dirstate.parents()]

64

parents = [cl.rev(p) for p in repo.dirstate.parents()]

66

else:

65

else:

67

parents = cl.parentrevs(r)

66

parents = cl.parentrevs(r)

68

for p in parents:

67

for p in parents:

69

if p < 0:

68

if p < 0:

70

continue

69

continue

71

if p not in side:

70

if p not in side:

72

# first time we see p; add it to visit

71

# first time we see p; add it to visit

73

side[p] = side[r]

72

side[p] = side[r]

74

if side[p]:

73

if side[p]:

75

interesting += 1

74

interesting += 1

76

heapq.heappush(visit, -p)

75

heapq.heappush(visit, -p)

77

elif side[p] and side[p] != side[r]:

76

elif side[p] and side[p] != side[r]:

78

# p was interesting but now we know better

77

# p was interesting but now we know better

79

side[p] = 0

78

side[p] = 0

80

interesting -= 1

79

interesting -= 1

81

hascommonancestor = True

80

hascommonancestor = True

82

if side[r]:

81

if side[r]:

83

limit = r # lowest rev visited

82

limit = r # lowest rev visited

84

interesting -= 1

83

interesting -= 1

85

84

86

if not hascommonancestor:

85

if not hascommonancestor:

87

return None

86

return None

88

87

89

# Consider the following flow (see test-commit-amend.t under issue4405):

88

# Consider the following flow (see test-commit-amend.t under issue4405):

90

# 1/ File 'a0' committed

89

# 1/ File 'a0' committed

91

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

90

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

92

# 3/ Move back to first commit

91

# 3/ Move back to first commit

93

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

92

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

94

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

93

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

95

#

94

#

96

# During the amend in step five, we will be in this state:

95

# During the amend in step five, we will be in this state:

97

#

96

#

98

# @ 3 temporary amend commit for a1-amend

97

# @ 3 temporary amend commit for a1-amend

99

# |

98

# |

100

# o 2 a1-amend

99

# o 2 a1-amend

101

# |

100

# |

102

# | o 1 a1

101

# | o 1 a1

103

# |/

102

# |/

104

# o 0 a0

103

# o 0 a0

105

#

104

#

106

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

105

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

107

# yet the filelog has the copy information in rev 1 and we will not look

106

# yet the filelog has the copy information in rev 1 and we will not look

108

# back far enough unless we also look at the a and b as candidates.

107

# back far enough unless we also look at the a and b as candidates.

109

# This only occurs when a is a descendent of b or visa-versa.

108

# This only occurs when a is a descendent of b or visa-versa.

110

return min(limit, a, b)

109

return min(limit, a, b)

111

110

112

def _chain(src, dst, a, b):

111

def _chain(src, dst, a, b):

113

"""chain two sets of copies a->b"""

112

"""chain two sets of copies a->b"""

114

t = a.copy()

113

t = a.copy()

115

for k, v in b.iteritems():

114

for k, v in b.iteritems():

116

if v in t:

115

if v in t:

117

# found a chain

116

# found a chain

118

if t[v] != k:

117

if t[v] != k:

119

# file wasn't renamed back to itself

118

# file wasn't renamed back to itself

120

t[k] = t[v]

119

t[k] = t[v]

121

if v not in dst:

120

if v not in dst:

122

# chain was a rename, not a copy

121

# chain was a rename, not a copy

123

del t[v]

122

del t[v]

124

if v in src:

123

if v in src:

125

# file is a copy of an existing file

124

# file is a copy of an existing file

126

t[k] = v

125

t[k] = v

127

126

128

# remove criss-crossed copies

127

# remove criss-crossed copies

129

for k, v in list(t.items()):

128

for k, v in list(t.items()):

130

if k in src and v in dst:

129

if k in src and v in dst:

131

del t[k]

130

del t[k]

132

131

133

return t

132

return t

134

133

135

def _tracefile(fctx, am, limit=-1):

134

def _tracefile(fctx, am, limit=-1):

136

"""return file context that is the ancestor of fctx present in ancestor

135

"""return file context that is the ancestor of fctx present in ancestor

137

manifest am, stopping after the first ancestor lower than limit"""

136

manifest am, stopping after the first ancestor lower than limit"""

138

137

139

for f in fctx.ancestors():

138

for f in fctx.ancestors():

140

if am.get(f.path(), None) == f.filenode():

139

if am.get(f.path(), None) == f.filenode():

141

return f

140

return f

142

if limit >= 0 and not f.isintroducedafter(limit):

141

if limit >= 0 and not f.isintroducedafter(limit):

143

return None

142

return None

144

143

145

def _dirstatecopies(d, match=None):

144

def _dirstatecopies(d, match=None):

146

ds = d._repo.dirstate

145

ds = d._repo.dirstate

147

c = ds.copies().copy()

146

c = ds.copies().copy()

148

for k in list(c):

147

for k in list(c):

149

if ds[k] not in 'anm' or (match and not match(k)):

148

if ds[k] not in 'anm' or (match and not match(k)):

150

del c[k]

149

del c[k]

151

return c

150

return c

152

151

153

def _computeforwardmissing(a, b, match=None):

152

def _computeforwardmissing(a, b, match=None):

154

"""Computes which files are in b but not a.

153

"""Computes which files are in b but not a.

155

This is its own function so extensions can easily wrap this call to see what

154

This is its own function so extensions can easily wrap this call to see what

156

files _forwardcopies is about to process.

155

files _forwardcopies is about to process.

157

"""

156

"""

158

ma = a.manifest()

157

ma = a.manifest()

159

mb = b.manifest()

158

mb = b.manifest()

160

return mb.filesnotin(ma, match=match)

159

return mb.filesnotin(ma, match=match)

161

160

162

def _committedforwardcopies(a, b, match):

161

def _committedforwardcopies(a, b, match):

163

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

162

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

164

# files might have to be traced back to the fctx parent of the last

163

# files might have to be traced back to the fctx parent of the last

165

# one-side-only changeset, but not further back than that

164

# one-side-only changeset, but not further back than that

166

repo = a._repo

165

repo = a._repo

167

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

166

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

168

dbg = repo.ui.debug

167

dbg = repo.ui.debug

169

if debug:

168

if debug:

170

dbg('debug.copies: looking into rename from %s to %s\n'

169

dbg('debug.copies: looking into rename from %s to %s\n'

171

% (a, b))

170

% (a, b))

172

limit = _findlimit(repo, a.rev(), b.rev())

171

limit = _findlimit(repo, a.rev(), b.rev())

173

if limit is None:

172

if limit is None:

174

limit = -1

173

limit = -1

175

if debug:

174

if debug:

176

dbg('debug.copies: search limit: %d\n' % limit)

175

dbg('debug.copies: search limit: %d\n' % limit)

177

am = a.manifest()

176

am = a.manifest()

178

177

179

# find where new files came from

178

# find where new files came from

180

# we currently don't try to find where old files went, too expensive

179

# we currently don't try to find where old files went, too expensive

181

# this means we can miss a case like 'hg rm b; hg cp a b'

180

# this means we can miss a case like 'hg rm b; hg cp a b'

182

cm = {}

181

cm = {}

183

182

184

# Computing the forward missing is quite expensive on large manifests, since

183

# Computing the forward missing is quite expensive on large manifests, since

185

# it compares the entire manifests. We can optimize it in the common use

184

# it compares the entire manifests. We can optimize it in the common use

186

# case of computing what copies are in a commit versus its parent (like

185

# case of computing what copies are in a commit versus its parent (like

187

# during a rebase or histedit). Note, we exclude merge commits from this

186

# during a rebase or histedit). Note, we exclude merge commits from this

188

# optimization, since the ctx.files() for a merge commit is not correct for

187

# optimization, since the ctx.files() for a merge commit is not correct for

189

# this comparison.

188

# this comparison.

190

forwardmissingmatch = match

189

forwardmissingmatch = match

191

if b.p1() == a and b.p2().node() == node.nullid:

190

if b.p1() == a and b.p2().node() == node.nullid:

192

filesmatcher = scmutil.matchfiles(a._repo, b.files())

191

filesmatcher = scmutil.matchfiles(a._repo, b.files())

193

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

192

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

194

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

193

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

195

194

196

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

195

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

197

196

198

if debug:

197

if debug:

199

dbg('debug.copies: missing file to search: %d\n' % len(missing))

198

dbg('debug.copies: missing file to search: %d\n' % len(missing))

200

199

201

for f in missing:

200

for f in missing:

202

if debug:

201

if debug:

203

dbg('debug.copies: tracing file: %s\n' % f)

202

dbg('debug.copies: tracing file: %s\n' % f)

204

fctx = b[f]

203

fctx = b[f]

205

fctx._ancestrycontext = ancestrycontext

204

fctx._ancestrycontext = ancestrycontext

206

205

207

if debug:

206

if debug:

208

start = util.timer()

207

start = util.timer()

209

ofctx = _tracefile(fctx, am, limit)

208

ofctx = _tracefile(fctx, am, limit)

210

if ofctx:

209

if ofctx:

211

if debug:

210

if debug:

212

dbg('debug.copies: rename of: %s\n' % ofctx._path)

211

dbg('debug.copies: rename of: %s\n' % ofctx._path)

213

cm[f] = ofctx.path()

212

cm[f] = ofctx.path()

214

if debug:

213

if debug:

215

dbg('debug.copies: time: %f seconds\n'

214

dbg('debug.copies: time: %f seconds\n'

216

% (util.timer() - start))

215

% (util.timer() - start))

217

return cm

216

return cm

218

217

219

def _forwardcopies(a, b, match=None):

218

def _forwardcopies(a, b, match=None):

220

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

219

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

221

220

222

match = a.repo().narrowmatch(match)

221

match = a.repo().narrowmatch(match)

223

# check for working copy

222

# check for working copy

224

if b.rev() is None:

223

if b.rev() is None:

225

if a == b.p1():

224

if a == b.p1():

226

# short-circuit to avoid issues with merge states

225

# short-circuit to avoid issues with merge states

227

return _dirstatecopies(b, match)

226

return _dirstatecopies(b, match)

228

227

229

cm = _committedforwardcopies(a, b.p1(), match)

228

cm = _committedforwardcopies(a, b.p1(), match)

230

# combine copies from dirstate if necessary

229

# combine copies from dirstate if necessary

231

return _chain(a, b, cm, _dirstatecopies(b, match))

230

return _chain(a, b, cm, _dirstatecopies(b, match))

232

return _committedforwardcopies(a, b, match)

231

return _committedforwardcopies(a, b, match)

233

232

234

def _backwardrenames(a, b):

233

def _backwardrenames(a, b):

235

if a._repo.ui.config('experimental', 'copytrace') == 'off':

234

if a._repo.ui.config('experimental', 'copytrace') == 'off':

236

return {}

235

return {}

237

236

238

# Even though we're not taking copies into account, 1:n rename situations

237

# Even though we're not taking copies into account, 1:n rename situations

239

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

238

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

240

# arbitrarily pick one of the renames.

239

# arbitrarily pick one of the renames.

241

f = _forwardcopies(b, a)

240

f = _forwardcopies(b, a)

242

r = {}

241

r = {}

243

for k, v in sorted(f.iteritems()):

242

for k, v in sorted(f.iteritems()):

244

# remove copies

243

# remove copies

245

if v in a:

244

if v in a:

246

continue

245

continue

247

r[v] = k

246

r[v] = k

248

return r

247

return r

249

248

250

def pathcopies(x, y, match=None):

249

def pathcopies(x, y, match=None):

251

"""find {dst@y: src@x} copy mapping for directed compare"""

250

"""find {dst@y: src@x} copy mapping for directed compare"""

252

repo = x._repo

251

repo = x._repo

253

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

252

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

254

if debug:

253

if debug:

255

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

254

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

256

% (x, y))

255

% (x, y))

257

if x == y or not x or not y:

256

if x == y or not x or not y:

258

return {}

257

return {}

259

a = y.ancestor(x)

258

a = y.ancestor(x)

260

if a == x:

259

if a == x:

261

if debug:

260

if debug:

262

repo.ui.debug('debug.copies: search mode: forward\n')

261

repo.ui.debug('debug.copies: search mode: forward\n')

263

return _forwardcopies(x, y, match=match)

262

return _forwardcopies(x, y, match=match)

264

if a == y:

263

if a == y:

265

if debug:

264

if debug:

266

repo.ui.debug('debug.copies: search mode: backward\n')

265

repo.ui.debug('debug.copies: search mode: backward\n')

267

return _backwardrenames(x, y)

266

return _backwardrenames(x, y)

268

if debug:

267

if debug:

269

repo.ui.debug('debug.copies: search mode: combined\n')

268

repo.ui.debug('debug.copies: search mode: combined\n')

270

return _chain(x, y, _backwardrenames(x, a),

269

return _chain(x, y, _backwardrenames(x, a),

271

_forwardcopies(a, y, match=match))

270

_forwardcopies(a, y, match=match))

272

271

273

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

272

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

274

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

273

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

275

and c2. This is its own function so extensions can easily wrap this call

274

and c2. This is its own function so extensions can easily wrap this call

276

to see what files mergecopies is about to process.

275

to see what files mergecopies is about to process.

277

276

278

Even though c1 and c2 are not used in this function, they are useful in

277

Even though c1 and c2 are not used in this function, they are useful in

279

other extensions for being able to read the file nodes of the changed files.

278

other extensions for being able to read the file nodes of the changed files.

280

279

281

"baselabel" can be passed to help distinguish the multiple computations

280

"baselabel" can be passed to help distinguish the multiple computations

282

done in the graft case.

281

done in the graft case.

283

"""

282

"""

284

u1 = sorted(addedinm1 - addedinm2)

283

u1 = sorted(addedinm1 - addedinm2)

285

u2 = sorted(addedinm2 - addedinm1)

284

u2 = sorted(addedinm2 - addedinm1)

286

285

287

header = " unmatched files in %s"

286

header = " unmatched files in %s"

288

if baselabel:

287

if baselabel:

289

header += ' (from %s)' % baselabel

288

header += ' (from %s)' % baselabel

290

if u1:

289

if u1:

291

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

290

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

292

if u2:

291

if u2:

293

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

292

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

294

293

295

return u1, u2

294

return u1, u2

296

295

297

def _makegetfctx(ctx):

296

def _makegetfctx(ctx):

298

"""return a 'getfctx' function suitable for _checkcopies usage

297

"""return a 'getfctx' function suitable for _checkcopies usage

299

298

300

We have to re-setup the function building 'filectx' for each

299

We have to re-setup the function building 'filectx' for each

301

'_checkcopies' to ensure the linkrev adjustment is properly setup for

300

'_checkcopies' to ensure the linkrev adjustment is properly setup for

302

each. Linkrev adjustment is important to avoid bug in rename

301

each. Linkrev adjustment is important to avoid bug in rename

303

detection. Moreover, having a proper '_ancestrycontext' setup ensures

302

detection. Moreover, having a proper '_ancestrycontext' setup ensures

304

the performance impact of this adjustment is kept limited. Without it,

303

the performance impact of this adjustment is kept limited. Without it,

305

each file could do a full dag traversal making the time complexity of

304

each file could do a full dag traversal making the time complexity of

306

the operation explode (see issue4537).

305

the operation explode (see issue4537).

307

306

308

This function exists here mostly to limit the impact on stable. Feel

307

This function exists here mostly to limit the impact on stable. Feel

309

free to refactor on default.

308

free to refactor on default.

310

"""

309

"""

311

rev = ctx.rev()

310

rev = ctx.rev()

312

repo = ctx._repo

311

repo = ctx._repo

313

ac = getattr(ctx, '_ancestrycontext', None)

312

ac = getattr(ctx, '_ancestrycontext', None)

314

if ac is None:

313

if ac is None:

315

revs = [rev]

314

revs = [rev]

316

if rev is None:

315

if rev is None:

317

revs = [p.rev() for p in ctx.parents()]

316

revs = [p.rev() for p in ctx.parents()]

318

ac = repo.changelog.ancestors(revs, inclusive=True)

317

ac = repo.changelog.ancestors(revs, inclusive=True)

319

ctx._ancestrycontext = ac

318

ctx._ancestrycontext = ac

320

def makectx(f, n):

319

def makectx(f, n):

321

if n in node.wdirfilenodeids: # in a working context?

320

if n in node.wdirfilenodeids: # in a working context?

322

if ctx.rev() is None:

321

if ctx.rev() is None:

323

return ctx.filectx(f)

322

return ctx.filectx(f)

324

return repo[None][f]

323

return repo[None][f]

325

fctx = repo.filectx(f, fileid=n)

324

fctx = repo.filectx(f, fileid=n)

326

# setup only needed for filectx not create from a changectx

325

# setup only needed for filectx not create from a changectx

327

fctx._ancestrycontext = ac

326

fctx._ancestrycontext = ac

328

fctx._descendantrev = rev

327

fctx._descendantrev = rev

329

return fctx

328

return fctx

330

return util.lrucachefunc(makectx)

329

return util.lrucachefunc(makectx)

331

330

332

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

331

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

333

"""combine partial copy paths"""

332

"""combine partial copy paths"""

334

remainder = {}

333

remainder = {}

335

for f in copyfrom:

334

for f in copyfrom:

336

if f in copyto:

335

if f in copyto:

337

finalcopy[copyto[f]] = copyfrom[f]

336

finalcopy[copyto[f]] = copyfrom[f]

338

del copyto[f]

337

del copyto[f]

339

for f in incompletediverge:

338

for f in incompletediverge:

340

assert f not in diverge

339

assert f not in diverge

341

ic = incompletediverge[f]

340

ic = incompletediverge[f]

342

if ic[0] in copyto:

341

if ic[0] in copyto:

343

diverge[f] = [copyto[ic[0]], ic[1]]

342

diverge[f] = [copyto[ic[0]], ic[1]]

344

else:

343

else:

345

remainder[f] = ic

344

remainder[f] = ic

346

return remainder

345

return remainder

347

346

348

def mergecopies(repo, c1, c2, base):

347

def mergecopies(repo, c1, c2, base):

349

"""

348

"""

350

The function calling different copytracing algorithms on the basis of config

349

The function calling different copytracing algorithms on the basis of config

351

which find moves and copies between context c1 and c2 that are relevant for

350

which find moves and copies between context c1 and c2 that are relevant for

352

merging. 'base' will be used as the merge base.

351

merging. 'base' will be used as the merge base.

353

352

354

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

353

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

355

files that were moved/ copied in one merge parent and modified in another.

354

files that were moved/ copied in one merge parent and modified in another.

356

For example:

355

For example:

357

356

358

o ---> 4 another commit

357

o ---> 4 another commit

359

|

358

|

360

| o ---> 3 commit that modifies a.txt

359

| o ---> 3 commit that modifies a.txt

361

| /

360

| /

362

o / ---> 2 commit that moves a.txt to b.txt

361

o / ---> 2 commit that moves a.txt to b.txt

363

|/

362

|/

364

o ---> 1 merge base

363

o ---> 1 merge base

365

364

366

If we try to rebase revision 3 on revision 4, since there is no a.txt in

365

If we try to rebase revision 3 on revision 4, since there is no a.txt in

367

revision 4, and if user have copytrace disabled, we prints the following

366

revision 4, and if user have copytrace disabled, we prints the following

368

message:

367

message:

369

368

370

```other changed <file> which local deleted```

369

```other changed <file> which local deleted```

371

370

372

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

371

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

373

"dirmove".

372

"dirmove".

374

373

375

"copy" is a mapping from destination name -> source name,

374

"copy" is a mapping from destination name -> source name,

376

where source is in c1 and destination is in c2 or vice-versa.

375

where source is in c1 and destination is in c2 or vice-versa.

377

376

378

"movewithdir" is a mapping from source name -> destination name,

377

"movewithdir" is a mapping from source name -> destination name,

379

where the file at source present in one context but not the other

378

where the file at source present in one context but not the other

380

needs to be moved to destination by the merge process, because the

379

needs to be moved to destination by the merge process, because the

381

other context moved the directory it is in.

380

other context moved the directory it is in.

382

381

383

"diverge" is a mapping of source name -> list of destination names

382

"diverge" is a mapping of source name -> list of destination names

384

for divergent renames.

383

for divergent renames.

385

384

386

"renamedelete" is a mapping of source name -> list of destination

385

"renamedelete" is a mapping of source name -> list of destination

387

names for files deleted in c1 that were renamed in c2 or vice-versa.

386

names for files deleted in c1 that were renamed in c2 or vice-versa.

388

387

389

"dirmove" is a mapping of detected source dir -> destination dir renames.

388

"dirmove" is a mapping of detected source dir -> destination dir renames.

390

This is needed for handling changes to new files previously grafted into

389

This is needed for handling changes to new files previously grafted into

391

renamed directories.

390

renamed directories.

392

"""

391

"""

393

# avoid silly behavior for update from empty dir

392

# avoid silly behavior for update from empty dir

394

if not c1 or not c2 or c1 == c2:

393

if not c1 or not c2 or c1 == c2:

395

return {}, {}, {}, {}, {}

394

return {}, {}, {}, {}, {}

396

395

397

# avoid silly behavior for parent -> working dir

396

# avoid silly behavior for parent -> working dir

398

if c2.node() is None and c1.node() == repo.dirstate.p1():

397

if c2.node() is None and c1.node() == repo.dirstate.p1():

399

return repo.dirstate.copies(), {}, {}, {}, {}

398

return repo.dirstate.copies(), {}, {}, {}, {}

400

399

401

copytracing = repo.ui.config('experimental', 'copytrace')

400

copytracing = repo.ui.config('experimental', 'copytrace')

402

boolctrace = stringutil.parsebool(copytracing)

401

boolctrace = stringutil.parsebool(copytracing)

403

402

404

# Copy trace disabling is explicitly below the node == p1 logic above

403

# Copy trace disabling is explicitly below the node == p1 logic above

405

# because the logic above is required for a simple copy to be kept across a

404

# because the logic above is required for a simple copy to be kept across a

406

# rebase.

405

# rebase.

407

if copytracing == 'heuristics':

406

if copytracing == 'heuristics':

408

# Do full copytracing if only non-public revisions are involved as

407

# Do full copytracing if only non-public revisions are involved as

409

# that will be fast enough and will also cover the copies which could

408

# that will be fast enough and will also cover the copies which could

410

# be missed by heuristics

409

# be missed by heuristics

411

if _isfullcopytraceable(repo, c1, base):

410

if _isfullcopytraceable(repo, c1, base):

412

return _fullcopytracing(repo, c1, c2, base)

411

return _fullcopytracing(repo, c1, c2, base)

413

return _heuristicscopytracing(repo, c1, c2, base)

412

return _heuristicscopytracing(repo, c1, c2, base)

414

elif boolctrace is False:

413

elif boolctrace is False:

415

# stringutil.parsebool() returns None when it is unable to parse the

414

# stringutil.parsebool() returns None when it is unable to parse the

416

# value, so we should rely on making sure copytracing is on such cases

415

# value, so we should rely on making sure copytracing is on such cases

417

return {}, {}, {}, {}, {}

416

return {}, {}, {}, {}, {}

418

else:

417

else:

419

return _fullcopytracing(repo, c1, c2, base)

418

return _fullcopytracing(repo, c1, c2, base)

420

419

421

def _isfullcopytraceable(repo, c1, base):

420

def _isfullcopytraceable(repo, c1, base):

422

""" Checks that if base, source and destination are all no-public branches,

421

""" Checks that if base, source and destination are all no-public branches,

423

if yes let's use the full copytrace algorithm for increased capabilities

422

if yes let's use the full copytrace algorithm for increased capabilities

424

since it will be fast enough.

423

since it will be fast enough.

425

424

426

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

425

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

427

number of changesets from c1 to base such that if number of changesets are

426

number of changesets from c1 to base such that if number of changesets are

428

more than the limit, full copytracing algorithm won't be used.

427

more than the limit, full copytracing algorithm won't be used.

429

"""

428

"""

430

if c1.rev() is None:

429

if c1.rev() is None:

431

c1 = c1.p1()

430

c1 = c1.p1()

432

if c1.mutable() and base.mutable():

431

if c1.mutable() and base.mutable():

433

sourcecommitlimit = repo.ui.configint('experimental',

432

sourcecommitlimit = repo.ui.configint('experimental',

434

'copytrace.sourcecommitlimit')

433

'copytrace.sourcecommitlimit')

435

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

434

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

436

return commits < sourcecommitlimit

435

return commits < sourcecommitlimit

437

return False

436

return False

438

437

439

def _fullcopytracing(repo, c1, c2, base):

438

def _fullcopytracing(repo, c1, c2, base):

440

""" The full copytracing algorithm which finds all the new files that were

439

""" The full copytracing algorithm which finds all the new files that were

441

added from merge base up to the top commit and for each file it checks if

440

added from merge base up to the top commit and for each file it checks if

442

this file was copied from another file.

441

this file was copied from another file.

443

442

444

This is pretty slow when a lot of changesets are involved but will track all

443

This is pretty slow when a lot of changesets are involved but will track all

445

the copies.

444

the copies.

446

"""

445

"""

447

# In certain scenarios (e.g. graft, update or rebase), base can be

446

# In certain scenarios (e.g. graft, update or rebase), base can be

448

# overridden We still need to know a real common ancestor in this case We

447

# overridden We still need to know a real common ancestor in this case We

449

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

448

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

450

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

449

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

451

# caller may not know if the revision passed in lieu of the CA is a genuine

450

# caller may not know if the revision passed in lieu of the CA is a genuine

452

# common ancestor or not without explicitly checking it, it's better to

451

# common ancestor or not without explicitly checking it, it's better to

453

# determine that here.

452

# determine that here.

454

#

453

#

455

# base.isancestorof(wc) is False, work around that

454

# base.isancestorof(wc) is False, work around that

456

_c1 = c1.p1() if c1.rev() is None else c1

455

_c1 = c1.p1() if c1.rev() is None else c1

457

_c2 = c2.p1() if c2.rev() is None else c2

456

_c2 = c2.p1() if c2.rev() is None else c2

458

# an endpoint is "dirty" if it isn't a descendant of the merge base

457

# an endpoint is "dirty" if it isn't a descendant of the merge base

459

# if we have a dirty endpoint, we need to trigger graft logic, and also

458

# if we have a dirty endpoint, we need to trigger graft logic, and also

460

# keep track of which endpoint is dirty

459

# keep track of which endpoint is dirty

461

dirtyc1 = not base.isancestorof(_c1)

460

dirtyc1 = not base.isancestorof(_c1)

462

dirtyc2 = not base.isancestorof(_c2)

461

dirtyc2 = not base.isancestorof(_c2)

463

graft = dirtyc1 or dirtyc2

462

graft = dirtyc1 or dirtyc2

464

tca = base

463

tca = base

465

if graft:

464

if graft:

466

tca = _c1.ancestor(_c2)

465

tca = _c1.ancestor(_c2)

467

466

468

limit = _findlimit(repo, c1.rev(), c2.rev())

467

limit = _findlimit(repo, c1.rev(), c2.rev())

469

if limit is None:

468

if limit is None:

470

# no common ancestor, no copies

469

# no common ancestor, no copies

471

return {}, {}, {}, {}, {}

470

return {}, {}, {}, {}, {}

472

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

471

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

473

472

474

m1 = c1.manifest()

473

m1 = c1.manifest()

475

m2 = c2.manifest()

474

m2 = c2.manifest()

476

mb = base.manifest()

475

mb = base.manifest()

477

476

478

# gather data from _checkcopies:

477

# gather data from _checkcopies:

479

# - diverge = record all diverges in this dict

478

# - diverge = record all diverges in this dict

480

# - copy = record all non-divergent copies in this dict

479

# - copy = record all non-divergent copies in this dict

481

# - fullcopy = record all copies in this dict

480

# - fullcopy = record all copies in this dict

482

# - incomplete = record non-divergent partial copies here

481

# - incomplete = record non-divergent partial copies here

483

# - incompletediverge = record divergent partial copies here

482

# - incompletediverge = record divergent partial copies here

484

diverge = {} # divergence data is shared

483

diverge = {} # divergence data is shared

485

incompletediverge = {}

484

incompletediverge = {}

486

data1 = {'copy': {},

485

data1 = {'copy': {},

487

'fullcopy': {},

486

'fullcopy': {},

488

'incomplete': {},

487

'incomplete': {},

489

'diverge': diverge,

488

'diverge': diverge,

490

'incompletediverge': incompletediverge,

489

'incompletediverge': incompletediverge,

491

}

490

}

492

data2 = {'copy': {},

491

data2 = {'copy': {},

493

'fullcopy': {},

492

'fullcopy': {},

494

'incomplete': {},

493

'incomplete': {},

495

'diverge': diverge,

494

'diverge': diverge,

496

'incompletediverge': incompletediverge,

495

'incompletediverge': incompletediverge,

497

}

496

}

498

497

499

# find interesting file sets from manifests

498

# find interesting file sets from manifests

500

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

499

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

501

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

500

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

502

bothnew = sorted(addedinm1 & addedinm2)

501

bothnew = sorted(addedinm1 & addedinm2)

503

if tca == base:

502

if tca == base:

504

# unmatched file from base

503

# unmatched file from base

505

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

504

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

506

u1u, u2u = u1r, u2r

505

u1u, u2u = u1r, u2r

507

else:

506

else:

508

# unmatched file from base (DAG rotation in the graft case)

507

# unmatched file from base (DAG rotation in the graft case)

509

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

508

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

510

baselabel='base')

509

baselabel='base')

511

# unmatched file from topological common ancestors (no DAG rotation)

510

# unmatched file from topological common ancestors (no DAG rotation)

512

# need to recompute this for directory move handling when grafting

511

# need to recompute this for directory move handling when grafting

513

mta = tca.manifest()

512

mta = tca.manifest()

514

u1u, u2u = _computenonoverlap(repo, c1, c2,

513

u1u, u2u = _computenonoverlap(repo, c1, c2,

515

m1.filesnotin(mta, repo.narrowmatch()),

514

m1.filesnotin(mta, repo.narrowmatch()),

516

m2.filesnotin(mta, repo.narrowmatch()),

515

m2.filesnotin(mta, repo.narrowmatch()),

517

baselabel='topological common ancestor')

516

baselabel='topological common ancestor')

518

517

519

for f in u1u:

518

for f in u1u:

520

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

519

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

521

520

522

for f in u2u:

521

for f in u2u:

523

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

522

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

524

523

525

copy = dict(data1['copy'])

524

copy = dict(data1['copy'])

526

copy.update(data2['copy'])

525

copy.update(data2['copy'])

527

fullcopy = dict(data1['fullcopy'])

526

fullcopy = dict(data1['fullcopy'])

528

fullcopy.update(data2['fullcopy'])

527

fullcopy.update(data2['fullcopy'])

529

528

530

if dirtyc1:

529

if dirtyc1:

531

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

530

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

532

incompletediverge)

531

incompletediverge)

533

else:

532

else:

534

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

533

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

535

incompletediverge)

534

incompletediverge)

536

535

537

renamedelete = {}

536

renamedelete = {}

538

renamedeleteset = set()

537

renamedeleteset = set()

539

divergeset = set()

538

divergeset = set()

540

for of, fl in list(diverge.items()):

539

for of, fl in list(diverge.items()):

541

if len(fl) == 1 or of in c1 or of in c2:

540

if len(fl) == 1 or of in c1 or of in c2:

542

del diverge[of] # not actually divergent, or not a rename

541

del diverge[of] # not actually divergent, or not a rename

543

if of not in c1 and of not in c2:

542

if of not in c1 and of not in c2:

544

# renamed on one side, deleted on the other side, but filter

543

# renamed on one side, deleted on the other side, but filter

545

# out files that have been renamed and then deleted

544

# out files that have been renamed and then deleted

546

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

545

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

547

renamedeleteset.update(fl) # reverse map for below

546

renamedeleteset.update(fl) # reverse map for below

548

else:

547

else:

549

divergeset.update(fl) # reverse map for below

548

divergeset.update(fl) # reverse map for below

550

549

551

if bothnew:

550

if bothnew:

552

repo.ui.debug(" unmatched files new in both:\n %s\n"

551

repo.ui.debug(" unmatched files new in both:\n %s\n"

553

% "\n ".join(bothnew))

552

% "\n ".join(bothnew))

554

bothdiverge = {}

553

bothdiverge = {}

555

bothincompletediverge = {}

554

bothincompletediverge = {}

556

remainder = {}

555

remainder = {}

557

both1 = {'copy': {},

556

both1 = {'copy': {},

558

'fullcopy': {},

557

'fullcopy': {},

559

'incomplete': {},

558

'incomplete': {},

560

'diverge': bothdiverge,

559

'diverge': bothdiverge,

561

'incompletediverge': bothincompletediverge

560

'incompletediverge': bothincompletediverge

562

}

561

}

563

both2 = {'copy': {},

562

both2 = {'copy': {},

564

'fullcopy': {},

563

'fullcopy': {},

565

'incomplete': {},

564

'incomplete': {},

566

'diverge': bothdiverge,

565

'diverge': bothdiverge,

567

'incompletediverge': bothincompletediverge

566

'incompletediverge': bothincompletediverge

568

}

567

}

569

for f in bothnew:

568

for f in bothnew:

570

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

569

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

571

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

570

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

572

if dirtyc1:

571

if dirtyc1:

573

# incomplete copies may only be found on the "dirty" side for bothnew

572

# incomplete copies may only be found on the "dirty" side for bothnew

574

assert not both2['incomplete']

573

assert not both2['incomplete']

575

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

574

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

576

bothincompletediverge)

575

bothincompletediverge)

577

elif dirtyc2:

576

elif dirtyc2:

578

assert not both1['incomplete']

577

assert not both1['incomplete']

579

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

578

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

580

bothincompletediverge)

579

bothincompletediverge)

581

else:

580

else:

582

# incomplete copies and divergences can't happen outside grafts

581

# incomplete copies and divergences can't happen outside grafts

583

assert not both1['incomplete']

582

assert not both1['incomplete']

584

assert not both2['incomplete']

583

assert not both2['incomplete']

585

assert not bothincompletediverge

584

assert not bothincompletediverge

586

for f in remainder:

585

for f in remainder:

587

assert f not in bothdiverge

586

assert f not in bothdiverge

588

ic = remainder[f]

587

ic = remainder[f]

589

if ic[0] in (m1 if dirtyc1 else m2):

588

if ic[0] in (m1 if dirtyc1 else m2):

590

# backed-out rename on one side, but watch out for deleted files

589

# backed-out rename on one side, but watch out for deleted files

591

bothdiverge[f] = ic

590

bothdiverge[f] = ic

592

for of, fl in bothdiverge.items():

591

for of, fl in bothdiverge.items():

593

if len(fl) == 2 and fl[0] == fl[1]:

592

if len(fl) == 2 and fl[0] == fl[1]:

594

copy[fl[0]] = of # not actually divergent, just matching renames

593

copy[fl[0]] = of # not actually divergent, just matching renames

595

594

596

if fullcopy and repo.ui.debugflag:

595

if fullcopy and repo.ui.debugflag:

597

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

596

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

598

"% = renamed and deleted):\n")

597

"% = renamed and deleted):\n")

599

for f in sorted(fullcopy):

598

for f in sorted(fullcopy):

600

note = ""

599

note = ""

601

if f in copy:

600

if f in copy:

602

note += "*"

601

note += "*"

603

if f in divergeset:

602

if f in divergeset:

604

note += "!"

603

note += "!"

605

if f in renamedeleteset:

604

if f in renamedeleteset:

606

note += "%"

605

note += "%"

607

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

606

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

608

note))

607

note))

609

del divergeset

608

del divergeset

610

609

611

if not fullcopy:

610

if not fullcopy:

612

return copy, {}, diverge, renamedelete, {}

611

return copy, {}, diverge, renamedelete, {}

613

612

614

repo.ui.debug(" checking for directory renames\n")

613

repo.ui.debug(" checking for directory renames\n")

615

614

616

# generate a directory move map

615

# generate a directory move map

617

d1, d2 = c1.dirs(), c2.dirs()

616

d1, d2 = c1.dirs(), c2.dirs()

618

# Hack for adding '', which is not otherwise added, to d1 and d2

617

# Hack for adding '', which is not otherwise added, to d1 and d2

619

d1.addpath('/')

618

d1.addpath('/')

620

d2.addpath('/')

619

d2.addpath('/')

621

invalid = set()

620

invalid = set()

622

dirmove = {}

621

dirmove = {}

623

622

624

# examine each file copy for a potential directory move, which is

623

# examine each file copy for a potential directory move, which is

625

# when all the files in a directory are moved to a new directory

624

# when all the files in a directory are moved to a new directory

626

for dst, src in fullcopy.iteritems():

625

for dst, src in fullcopy.iteritems():

627

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

626

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

628

if dsrc in invalid:

627

if dsrc in invalid:

629

# already seen to be uninteresting

628

# already seen to be uninteresting

630

continue

629

continue

631

elif dsrc in d1 and ddst in d1:

630

elif dsrc in d1 and ddst in d1:

632

# directory wasn't entirely moved locally

631

# directory wasn't entirely moved locally

633

invalid.add(dsrc)

632

invalid.add(dsrc)

634

elif dsrc in d2 and ddst in d2:

633

elif dsrc in d2 and ddst in d2:

635

# directory wasn't entirely moved remotely

634

# directory wasn't entirely moved remotely

636

invalid.add(dsrc)

635

invalid.add(dsrc)

637

elif dsrc in dirmove and dirmove[dsrc] != ddst:

636

elif dsrc in dirmove and dirmove[dsrc] != ddst:

638

# files from the same directory moved to two different places

637

# files from the same directory moved to two different places

639

invalid.add(dsrc)

638

invalid.add(dsrc)

640

else:

639

else:

641

# looks good so far

640

# looks good so far

642

dirmove[dsrc] = ddst

641

dirmove[dsrc] = ddst

643

642

644

for i in invalid:

643

for i in invalid:

645

if i in dirmove:

644

if i in dirmove:

646

del dirmove[i]

645

del dirmove[i]

647

del d1, d2, invalid

646

del d1, d2, invalid

648

647

649

if not dirmove:

648

if not dirmove:

650

return copy, {}, diverge, renamedelete, {}

649

return copy, {}, diverge, renamedelete, {}

651

650

652

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

651

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

653

652

654

for d in dirmove:

653

for d in dirmove:

655

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

654

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

656

(d, dirmove[d]))

655

(d, dirmove[d]))

657

656

658

movewithdir = {}

657

movewithdir = {}

659

# check unaccounted nonoverlapping files against directory moves

658

# check unaccounted nonoverlapping files against directory moves

660

for f in u1r + u2r:

659

for f in u1r + u2r:

661

if f not in fullcopy:

660

if f not in fullcopy:

662

for d in dirmove:

661

for d in dirmove:

663

if f.startswith(d):

662

if f.startswith(d):

664

# new file added in a directory that was moved, move it

663

# new file added in a directory that was moved, move it

665

df = dirmove[d] + f[len(d):]

664

df = dirmove[d] + f[len(d):]

666

if df not in copy:

665

if df not in copy:

667

movewithdir[f] = df

666

movewithdir[f] = df

668

repo.ui.debug((" pending file src: '%s' -> "

667

repo.ui.debug((" pending file src: '%s' -> "

669

"dst: '%s'\n") % (f, df))

668

"dst: '%s'\n") % (f, df))

670

break

669

break

671

670

672

return copy, movewithdir, diverge, renamedelete, dirmove

671

return copy, movewithdir, diverge, renamedelete, dirmove

673

672

674

def _heuristicscopytracing(repo, c1, c2, base):

673

def _heuristicscopytracing(repo, c1, c2, base):

675

""" Fast copytracing using filename heuristics

674

""" Fast copytracing using filename heuristics

676

675

677

Assumes that moves or renames are of following two types:

676

Assumes that moves or renames are of following two types:

678

677

679

1) Inside a directory only (same directory name but different filenames)

678

1) Inside a directory only (same directory name but different filenames)

680

2) Move from one directory to another

679

2) Move from one directory to another

681

(same filenames but different directory names)

680

(same filenames but different directory names)

682

681

683

Works only when there are no merge commits in the "source branch".

682

Works only when there are no merge commits in the "source branch".

684

Source branch is commits from base up to c2 not including base.

683

Source branch is commits from base up to c2 not including base.

685

684

686

If merge is involved it fallbacks to _fullcopytracing().

685

If merge is involved it fallbacks to _fullcopytracing().

687

686

688

Can be used by setting the following config:

687

Can be used by setting the following config:

689

688

690

[experimental]

689

[experimental]

691

copytrace = heuristics

690

copytrace = heuristics

692

691

693

In some cases the copy/move candidates found by heuristics can be very large

692

In some cases the copy/move candidates found by heuristics can be very large

694

in number and that will make the algorithm slow. The number of possible

693

in number and that will make the algorithm slow. The number of possible

695

candidates to check can be limited by using the config

694

candidates to check can be limited by using the config

696

`experimental.copytrace.movecandidateslimit` which defaults to 100.

695

`experimental.copytrace.movecandidateslimit` which defaults to 100.

697

"""

696

"""

698

697

699

if c1.rev() is None:

698

if c1.rev() is None:

700

c1 = c1.p1()

699

c1 = c1.p1()

701

if c2.rev() is None:

700

if c2.rev() is None:

702

c2 = c2.p1()

701

c2 = c2.p1()

703

702

704

copies = {}

703

copies = {}

705

704

706

changedfiles = set()

705

changedfiles = set()

707

m1 = c1.manifest()

706

m1 = c1.manifest()

708

if not repo.revs('%d::%d', base.rev(), c2.rev()):

707

if not repo.revs('%d::%d', base.rev(), c2.rev()):

709

# If base is not in c2 branch, we switch to fullcopytracing

708

# If base is not in c2 branch, we switch to fullcopytracing

710

repo.ui.debug("switching to full copytracing as base is not "

709

repo.ui.debug("switching to full copytracing as base is not "

711

"an ancestor of c2\n")

710

"an ancestor of c2\n")

712

return _fullcopytracing(repo, c1, c2, base)

711

return _fullcopytracing(repo, c1, c2, base)

713

712

714

ctx = c2

713

ctx = c2

715

while ctx != base:

714

while ctx != base:

716

if len(ctx.parents()) == 2:

715

if len(ctx.parents()) == 2:

717

# To keep things simple let's not handle merges

716

# To keep things simple let's not handle merges

718

repo.ui.debug("switching to full copytracing because of merges\n")

717

repo.ui.debug("switching to full copytracing because of merges\n")

719

return _fullcopytracing(repo, c1, c2, base)

718

return _fullcopytracing(repo, c1, c2, base)

720

changedfiles.update(ctx.files())

719

changedfiles.update(ctx.files())

721

ctx = ctx.p1()

720

ctx = ctx.p1()

722

721

723

cp = _forwardcopies(base, c2)

722

cp = _forwardcopies(base, c2)

724

for dst, src in cp.iteritems():

723

for dst, src in cp.iteritems():

725

if src in m1:

724

if src in m1:

726

copies[dst] = src

725

copies[dst] = src

727

726

728

# file is missing if it isn't present in the destination, but is present in

727

# file is missing if it isn't present in the destination, but is present in

729

# the base and present in the source.

728

# the base and present in the source.

730

# Presence in the base is important to exclude added files, presence in the

729

# Presence in the base is important to exclude added files, presence in the

731

# source is important to exclude removed files.

730

# source is important to exclude removed files.

732

filt = lambda f: f not in m1 and f in base and f in c2

731

filt = lambda f: f not in m1 and f in base and f in c2

733

missingfiles = [f for f in changedfiles if filt(f)]

732

missingfiles = [f for f in changedfiles if filt(f)]

734

733

735

if missingfiles:

734

if missingfiles:

736

basenametofilename = collections.defaultdict(list)

735

basenametofilename = collections.defaultdict(list)

737

dirnametofilename = collections.defaultdict(list)

736

dirnametofilename = collections.defaultdict(list)

738

737

739

for f in m1.filesnotin(base.manifest()):

738

for f in m1.filesnotin(base.manifest()):

740

basename = os.path.basename(f)

739

basename = os.path.basename(f)

741

dirname = os.path.dirname(f)

740

dirname = os.path.dirname(f)

742

basenametofilename[basename].append(f)

741

basenametofilename[basename].append(f)

743

dirnametofilename[dirname].append(f)

742

dirnametofilename[dirname].append(f)

744

743

745

for f in missingfiles:

744

for f in missingfiles:

746

basename = os.path.basename(f)

745

basename = os.path.basename(f)

747

dirname = os.path.dirname(f)

746

dirname = os.path.dirname(f)

748

samebasename = basenametofilename[basename]

747

samebasename = basenametofilename[basename]

749

samedirname = dirnametofilename[dirname]

748

samedirname = dirnametofilename[dirname]

750

movecandidates = samebasename + samedirname

749

movecandidates = samebasename + samedirname

751

# f is guaranteed to be present in c2, that's why

750

# f is guaranteed to be present in c2, that's why

752

# c2.filectx(f) won't fail

751

# c2.filectx(f) won't fail

753

f2 = c2.filectx(f)

752

f2 = c2.filectx(f)

754

# we can have a lot of candidates which can slow down the heuristics

753

# we can have a lot of candidates which can slow down the heuristics

755

# config value to limit the number of candidates moves to check

754

# config value to limit the number of candidates moves to check

756

maxcandidates = repo.ui.configint('experimental',

755

maxcandidates = repo.ui.configint('experimental',

757

'copytrace.movecandidateslimit')

756

'copytrace.movecandidateslimit')

758

757

759

if len(movecandidates) > maxcandidates:

758

if len(movecandidates) > maxcandidates:

760

repo.ui.status(_("skipping copytracing for '%s', more "

759

repo.ui.status(_("skipping copytracing for '%s', more "

761

"candidates than the limit: %d\n")

760

"candidates than the limit: %d\n")

762

% (f, len(movecandidates)))

761

% (f, len(movecandidates)))

763

continue

762

continue

764

763

765

for candidate in movecandidates:

764

for candidate in movecandidates:

766

f1 = c1.filectx(candidate)

765

f1 = c1.filectx(candidate)

767

if _related(f1, f2):

766

if _related(f1, f2):

768

# if there are a few related copies then we'll merge

767

# if there are a few related copies then we'll merge

769

# changes into all of them. This matches the behaviour

768

# changes into all of them. This matches the behaviour

770

# of upstream copytracing

769

# of upstream copytracing

771

copies[candidate] = f

770

copies[candidate] = f

772

771

773

return copies, {}, {}, {}, {}

772

return copies, {}, {}, {}, {}

774

773

775

def _related(f1, f2):

774

def _related(f1, f2):

776

"""return True if f1 and f2 filectx have a common ancestor

775

"""return True if f1 and f2 filectx have a common ancestor

777

776

778

Walk back to common ancestor to see if the two files originate

777

Walk back to common ancestor to see if the two files originate

779

from the same file. Since workingfilectx's rev() is None it messes

778

from the same file. Since workingfilectx's rev() is None it messes

780

up the integer comparison logic, hence the pre-step check for

779

up the integer comparison logic, hence the pre-step check for

781

None (f1 and f2 can only be workingfilectx's initially).

780

None (f1 and f2 can only be workingfilectx's initially).

782

"""

781

"""

783

782

784

if f1 == f2:

783

if f1 == f2:

785

return f1 # a match

784

return f1 # a match

786

785

787

g1, g2 = f1.ancestors(), f2.ancestors()

786

g1, g2 = f1.ancestors(), f2.ancestors()

788

try:

787

try:

789

f1r, f2r = f1.linkrev(), f2.linkrev()

788

f1r, f2r = f1.linkrev(), f2.linkrev()

790

789

791

if f1r is None:

790

if f1r is None:

792

f1 = next(g1)

791

f1 = next(g1)

793

if f2r is None:

792

if f2r is None:

794

f2 = next(g2)

793

f2 = next(g2)

795

794

796

while True:

795

while True:

797

f1r, f2r = f1.linkrev(), f2.linkrev()

796

f1r, f2r = f1.linkrev(), f2.linkrev()

798

if f1r > f2r:

797

if f1r > f2r:

799

f1 = next(g1)

798

f1 = next(g1)

800

elif f2r > f1r:

799

elif f2r > f1r:

801

f2 = next(g2)

800

f2 = next(g2)

802

else: # f1 and f2 point to files in the same linkrev

801

else: # f1 and f2 point to files in the same linkrev

803

return f1 == f2 # true if they point to the same file

802

return f1 == f2 # true if they point to the same file

804

except StopIteration:

803

except StopIteration:

805

return False

804

return False

806

805

807

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

806

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

808

"""

807

"""

809

check possible copies of f from msrc to mdst

808

check possible copies of f from msrc to mdst

810

809

811

srcctx = starting context for f in msrc

810

srcctx = starting context for f in msrc

812

dstctx = destination context for f in mdst

811

dstctx = destination context for f in mdst

813

f = the filename to check (as in msrc)

812

f = the filename to check (as in msrc)

814

base = the changectx used as a merge base

813

base = the changectx used as a merge base

815

tca = topological common ancestor for graft-like scenarios

814

tca = topological common ancestor for graft-like scenarios

816

remotebase = True if base is outside tca::srcctx, False otherwise

815

remotebase = True if base is outside tca::srcctx, False otherwise

817

limit = the rev number to not search beyond

816

limit = the rev number to not search beyond

818

data = dictionary of dictionary to store copy data. (see mergecopies)

817

data = dictionary of dictionary to store copy data. (see mergecopies)

819

818

820

note: limit is only an optimization, and provides no guarantee that

819

note: limit is only an optimization, and provides no guarantee that

821

irrelevant revisions will not be visited

820

irrelevant revisions will not be visited

822

there is no easy way to make this algorithm stop in a guaranteed way

821

there is no easy way to make this algorithm stop in a guaranteed way

823

once it "goes behind a certain revision".

822

once it "goes behind a certain revision".

824

"""

823

"""

825

824

826

msrc = srcctx.manifest()

825

msrc = srcctx.manifest()

827

mdst = dstctx.manifest()

826

mdst = dstctx.manifest()

828

mb = base.manifest()

827

mb = base.manifest()

829

mta = tca.manifest()

828

mta = tca.manifest()

830

# Might be true if this call is about finding backward renames,

829

# Might be true if this call is about finding backward renames,

831

# This happens in the case of grafts because the DAG is then rotated.

830

# This happens in the case of grafts because the DAG is then rotated.

832

# If the file exists in both the base and the source, we are not looking

831

# If the file exists in both the base and the source, we are not looking

833

# for a rename on the source side, but on the part of the DAG that is

832

# for a rename on the source side, but on the part of the DAG that is

834

# traversed backwards.

833

# traversed backwards.

835

#

834

#

836

# In the case there is both backward and forward renames (before and after

835

# In the case there is both backward and forward renames (before and after

837

# the base) this is more complicated as we must detect a divergence.

836

# the base) this is more complicated as we must detect a divergence.

838

# We use 'backwards = False' in that case.

837

# We use 'backwards = False' in that case.

839

backwards = not remotebase and base != tca and f in mb

838

backwards = not remotebase and base != tca and f in mb

840

getsrcfctx = _makegetfctx(srcctx)

839

getsrcfctx = _makegetfctx(srcctx)

841

getdstfctx = _makegetfctx(dstctx)

840

getdstfctx = _makegetfctx(dstctx)

842

841

843

if msrc[f] == mb.get(f) and not remotebase:

842

if msrc[f] == mb.get(f) and not remotebase:

844

# Nothing to merge

843

# Nothing to merge

845

return

844

return

846

845

847

of = None

846

of = None

848

seen = {f}

847

seen = {f}

849

for oc in getsrcfctx(f, msrc[f]).ancestors():

848

for oc in getsrcfctx(f, msrc[f]).ancestors():

850

of = oc.path()

849

of = oc.path()

851

if of in seen:

850

if of in seen:

852

# check limit late - grab last rename before

851

# check limit late - grab last rename before

853

if oc.linkrev() < limit:

852

if oc.linkrev() < limit:

854

break

853

break

855

continue

854

continue

856

seen.add(of)

855

seen.add(of)

857

856

858

# remember for dir rename detection

857

# remember for dir rename detection

859

if backwards:

858

if backwards:

860

data['fullcopy'][of] = f # grafting backwards through renames

859

data['fullcopy'][of] = f # grafting backwards through renames

861

else:

860

else:

862

data['fullcopy'][f] = of

861

data['fullcopy'][f] = of

863

if of not in mdst:

862

if of not in mdst:

864

continue # no match, keep looking

863

continue # no match, keep looking

865

if mdst[of] == mb.get(of):

864

if mdst[of] == mb.get(of):

866

return # no merge needed, quit early

865

return # no merge needed, quit early

867

c2 = getdstfctx(of, mdst[of])

866

c2 = getdstfctx(of, mdst[of])

868

# c2 might be a plain new file on added on destination side that is

867

# c2 might be a plain new file on added on destination side that is

869

# unrelated to the droids we are looking for.

868

# unrelated to the droids we are looking for.

870

cr = _related(oc, c2)

869

cr = _related(oc, c2)

871

if cr and (of == f or of == c2.path()): # non-divergent

870

if cr and (of == f or of == c2.path()): # non-divergent

872

if backwards:

871

if backwards:

873

data['copy'][of] = f

872

data['copy'][of] = f

874

elif of in mb:

873

elif of in mb:

875

data['copy'][f] = of

874

data['copy'][f] = of

876

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

875

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

877

data['copy'][of] = f

876

data['copy'][of] = f

878

del data['fullcopy'][f]

877

del data['fullcopy'][f]

879

data['fullcopy'][of] = f

878

data['fullcopy'][of] = f

880

else: # divergence w.r.t. graft CA on one side of topological CA

879

else: # divergence w.r.t. graft CA on one side of topological CA

881

for sf in seen:

880

for sf in seen:

882

if sf in mb:

881

if sf in mb:

883

assert sf not in data['diverge']

882

assert sf not in data['diverge']

884

data['diverge'][sf] = [f, of]

883

data['diverge'][sf] = [f, of]

885

break

884

break

886

return

885

return

887

886

888

if of in mta:

887

if of in mta:

889

if backwards or remotebase:

888

if backwards or remotebase:

890

data['incomplete'][of] = f

889

data['incomplete'][of] = f

891

else:

890

else:

892

for sf in seen:

891

for sf in seen:

893

if sf in mb:

892

if sf in mb:

894

if tca == base:

893

if tca == base:

895

data['diverge'].setdefault(sf, []).append(f)

894

data['diverge'].setdefault(sf, []).append(f)

896

else:

895

else:

897

data['incompletediverge'][sf] = [of, f]

896

data['incompletediverge'][sf] = [of, f]

898

return

897

return

899

898

900

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

899

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

901

"""reproduce copies from fromrev to rev in the dirstate

900

"""reproduce copies from fromrev to rev in the dirstate

902

901

903

If skiprev is specified, it's a revision that should be used to

902

If skiprev is specified, it's a revision that should be used to

904

filter copy records. Any copies that occur between fromrev and

903

filter copy records. Any copies that occur between fromrev and

905

skiprev will not be duplicated, even if they appear in the set of

904

skiprev will not be duplicated, even if they appear in the set of

906

copies between fromrev and rev.

905

copies between fromrev and rev.

907

"""

906

"""

908

exclude = {}

907

exclude = {}

909

ctraceconfig = repo.ui.config('experimental', 'copytrace')

908

ctraceconfig = repo.ui.config('experimental', 'copytrace')

910

bctrace = stringutil.parsebool(ctraceconfig)

909

bctrace = stringutil.parsebool(ctraceconfig)

911

if (skiprev is not None and

910

if (skiprev is not None and

912

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

911

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

913

# copytrace='off' skips this line, but not the entire function because

912

# copytrace='off' skips this line, but not the entire function because

914

# the line below is O(size of the repo) during a rebase, while the rest

913

# the line below is O(size of the repo) during a rebase, while the rest

915

# of the function is much faster (and is required for carrying copy

914

# of the function is much faster (and is required for carrying copy

916

# metadata across the rebase anyway).

915

# metadata across the rebase anyway).

917

exclude = pathcopies(repo[fromrev], repo[skiprev])

916

exclude = pathcopies(repo[fromrev], repo[skiprev])

918

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

917

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

919

# copies.pathcopies returns backward renames, so dst might not

918

# copies.pathcopies returns backward renames, so dst might not

920

# actually be in the dirstate

919

# actually be in the dirstate

921

if dst in exclude:

920

if dst in exclude:

922

continue

921

continue

923

wctx[dst].markcopied(src)

922

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
-                working = len(cl) # pseudo rev for the working directory
                 if a is None:
-                    a = working
+                    a = node.wdirrev
                 if b is None:
-                    b = working
+                    b = node.wdirrev
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
-                limit = working
+                limit = node.wdirrev
                 while interesting:
                     r = -heapq.heappop(visit)
-                    if r == working:
+                    if r == node.wdirrev:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in list(t.items()):
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and not f.isintroducedafter(limit):
                         return None
             def _dirstatecopies(d, match=None):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg('debug.copies:    looking into rename from %s to %s\n'
                         % (a, b))
                 limit = _findlimit(repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 if debug:
                     dbg('debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg('debug.copies:      missing file to search: %d\n' % len(missing))
                 for f in missing:
                     if debug:
                         dbg('debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         if debug:
                             dbg('debug.copies:          rename of: %s\n' % ofctx._path)
                         cm[f] = ofctx.path()
                     if debug:
                         dbg('debug.copies:          time: %f seconds\n'
                             % (util.timer() - start))
                 return cm
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 if debug:
                     repo.ui.debug('debug.copies: searching copies from %s to %s\n'
                                   % (x, y))
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: forward\n')
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: backward\n')
                     return _backwardrenames(x, y)
                 if debug:
                     repo.ui.debug('debug.copies: search mode: combined\n')
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirfilenodeids:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 boolctrace = stringutil.parsebool(copytracing)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 elif boolctrace is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.isancestorof(wc) is False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not base.isancestorof(_c1)
                 dirtyc2 = not base.isancestorof(_c2)
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2,
                                                   m1.filesnotin(mta, repo.narrowmatch()),
                                                   m2.filesnotin(mta, repo.narrowmatch()),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if oc.linkrev() < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config('experimental', 'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if (skiprev is not None and
                     (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)