upstream/mercurial-mirror Commit - r41918:012f6955

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

from .utils import (

23

from .utils import (

24

stringutil,

24

stringutil,

25

)

25

)

26

27

def _findlimit(repo, ctxa, ctxb):

27

def _findlimit(repo, ctxa, ctxb):

28

"""

28

"""

29

Find the last revision that needs to be checked to ensure that a full

29

Find the last revision that needs to be checked to ensure that a full

30

transitive closure for file copies can be properly calculated.

30

transitive closure for file copies can be properly calculated.

31

Generally, this means finding the earliest revision number that's an

31

Generally, this means finding the earliest revision number that's an

32

ancestor of a or b but not both, except when a or b is a direct descendent

32

ancestor of a or b but not both, except when a or b is a direct descendent

33

of the other, in which case we can return the minimum revnum of a and b.

33

of the other, in which case we can return the minimum revnum of a and b.

34

"""

34

"""

35

36

# basic idea:

36

# basic idea:

37

# - mark a and b with different sides

37

# - mark a and b with different sides

38

# - if a parent's children are all on the same side, the parent is

38

# - if a parent's children are all on the same side, the parent is

39

# on that side, otherwise it is on no side

39

# on that side, otherwise it is on no side

40

# - walk the graph in topological order with the help of a heap;

40

# - walk the graph in topological order with the help of a heap;

41

# - add unseen parents to side map

41

# - add unseen parents to side map

42

# - clear side of any parent that has children on different sides

42

# - clear side of any parent that has children on different sides

43

# - track number of interesting revs that might still be on a side

43

# - track number of interesting revs that might still be on a side

44

# - track the lowest interesting rev seen

44

# - track the lowest interesting rev seen

45

# - quit when interesting revs is zero

45

# - quit when interesting revs is zero

46

47

cl = repo.changelog

47

cl = repo.changelog

48

wdirparents = None

48

wdirparents = None

49

a = ctxa.rev()

49

a = ctxa.rev()

50

b = ctxb.rev()

50

b = ctxb.rev()

51

if a is None:

51

if a is None:

52

wdirparents = (ctxa.p1(), ctxa.p2())

52

wdirparents = (ctxa.p1(), ctxa.p2())

53

a = node.wdirrev

53

a = node.wdirrev

54

if b is None:

54

if b is None:

55

assert not wdirparents

55

assert not wdirparents

56

wdirparents = (ctxb.p1(), ctxb.p2())

56

wdirparents = (ctxb.p1(), ctxb.p2())

57

b = node.wdirrev

57

b = node.wdirrev

58

59

side = {a: -1, b: 1}

59

side = {a: -1, b: 1}

60

visit = [-a, -b]

60

visit = [-a, -b]

61

heapq.heapify(visit)

61

heapq.heapify(visit)

62

interesting = len(visit)

62

interesting = len(visit)

63

limit = node.wdirrev

63

limit = node.wdirrev

64

65

while interesting:

65

while interesting:

66

r = -heapq.heappop(visit)

66

r = -heapq.heappop(visit)

67

if r == node.wdirrev:

67

if r == node.wdirrev:

68

parents = [pctx.rev() for pctx in wdirparents]

68

parents = [pctx.rev() for pctx in wdirparents]

69

else:

69

else:

70

parents = cl.parentrevs(r)

70

parents = cl.parentrevs(r)

71

if parents[1] == node.nullrev:

71

if parents[1] == node.nullrev:

72

parents = parents[:1]

72

parents = parents[:1]

73

for p in parents:

73

for p in parents:

74

if p not in side:

74

if p not in side:

75

# first time we see p; add it to visit

75

# first time we see p; add it to visit

76

side[p] = side[r]

76

side[p] = side[r]

77

if side[p]:

77

if side[p]:

78

interesting += 1

78

interesting += 1

79

heapq.heappush(visit, -p)

79

heapq.heappush(visit, -p)

80

elif side[p] and side[p] != side[r]:

80

elif side[p] and side[p] != side[r]:

81

# p was interesting but now we know better

81

# p was interesting but now we know better

82

side[p] = 0

82

side[p] = 0

83

interesting -= 1

83

interesting -= 1

84

if side[r]:

84

if side[r]:

85

limit = r # lowest rev visited

85

limit = r # lowest rev visited

86

interesting -= 1

86

interesting -= 1

87

88

# Consider the following flow (see test-commit-amend.t under issue4405):

88

# Consider the following flow (see test-commit-amend.t under issue4405):

89

# 1/ File 'a0' committed

89

# 1/ File 'a0' committed

90

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

90

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

91

# 3/ Move back to first commit

91

# 3/ Move back to first commit

92

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

92

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

93

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

93

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

94

#

94

#

95

# During the amend in step five, we will be in this state:

95

# During the amend in step five, we will be in this state:

96

#

96

#

97

# @ 3 temporary amend commit for a1-amend

97

# @ 3 temporary amend commit for a1-amend

98

# |

98

# |

99

# o 2 a1-amend

99

# o 2 a1-amend

100

# |

100

# |

101

# | o 1 a1

101

# | o 1 a1

102

# |/

102

# |/

103

# o 0 a0

103

# o 0 a0

104

#

104

#

105

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

105

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

106

# yet the filelog has the copy information in rev 1 and we will not look

106

# yet the filelog has the copy information in rev 1 and we will not look

107

# back far enough unless we also look at the a and b as candidates.

107

# back far enough unless we also look at the a and b as candidates.

108

# This only occurs when a is a descendent of b or visa-versa.

108

# This only occurs when a is a descendent of b or visa-versa.

109

return min(limit, a, b)

109

return min(limit, a, b)

110

111

def _chain(src, dst, a, b):

111

def _chain(src, dst, a, b):

112

"""chain two sets of copies a->b"""

112

"""chain two sets of copies a->b"""

113

t = a.copy()

113

t = a.copy()

114

for k, v in b.iteritems():

114

for k, v in b.iteritems():

115

if v in t:

115

if v in t:

116

# found a chain

116

# found a chain

117

if t[v] != k:

117

if t[v] != k:

118

# file wasn't renamed back to itself

118

# file wasn't renamed back to itself

119

t[k] = t[v]

119

t[k] = t[v]

120

if v not in dst:

120

if v not in dst:

121

# chain was a rename, not a copy

121

# chain was a rename, not a copy

122

del t[v]

122

del t[v]

123

if v in src:

123

if v in src:

124

# file is a copy of an existing file

124

# file is a copy of an existing file

125

t[k] = v

125

t[k] = v

126

127

# remove criss-crossed copies

127

# remove criss-crossed copies

128

for k, v in list(t.items()):

128

for k, v in list(t.items()):

129

if k in src and v in dst:

129

if k in src and v in dst:

130

del t[k]

130

del t[k]

131

132

return t

132

return t

133

134

def _tracefile(fctx, am, limit=node.nullrev):

134

def _tracefile(fctx, am, limit=node.nullrev):

135

"""return file context that is the ancestor of fctx present in ancestor

135

"""return file context that is the ancestor of fctx present in ancestor

136

manifest am, stopping after the first ancestor lower than limit"""

136

manifest am, stopping after the first ancestor lower than limit"""

137

138

for f in fctx.ancestors():

138

for f in fctx.ancestors():

139

if am.get(f.path(), None) == f.filenode():

139

if am.get(f.path(), None) == f.filenode():

140

return f

140

return f

141

if limit >= 0 and not f.isintroducedafter(limit):

141

if limit >= 0 and not f.isintroducedafter(limit):

142

return None

142

return None

143

144

def _dirstatecopies(d, match=None):

144

def _dirstatecopies(repo, match=None):

145

ds = d._repo.dirstate

145

ds = repo.dirstate

146

c = ds.copies().copy()

146

c = ds.copies().copy()

147

for k in list(c):

147

for k in list(c):

148

if ds[k] not in 'anm' or (match and not match(k)):

148

if ds[k] not in 'anm' or (match and not match(k)):

149

del c[k]

149

del c[k]

150

return c

150

return c

151

152

def _computeforwardmissing(a, b, match=None):

152

def _computeforwardmissing(a, b, match=None):

153

"""Computes which files are in b but not a.

153

"""Computes which files are in b but not a.

154

This is its own function so extensions can easily wrap this call to see what

154

This is its own function so extensions can easily wrap this call to see what

155

files _forwardcopies is about to process.

155

files _forwardcopies is about to process.

156

"""

156

"""

157

ma = a.manifest()

157

ma = a.manifest()

158

mb = b.manifest()

158

mb = b.manifest()

159

return mb.filesnotin(ma, match=match)

159

return mb.filesnotin(ma, match=match)

160

161

def _committedforwardcopies(a, b, match):

161

def _committedforwardcopies(a, b, match):

162

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

162

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

163

# files might have to be traced back to the fctx parent of the last

163

# files might have to be traced back to the fctx parent of the last

164

# one-side-only changeset, but not further back than that

164

# one-side-only changeset, but not further back than that

165

repo = a._repo

165

repo = a._repo

166

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

166

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

167

dbg = repo.ui.debug

167

dbg = repo.ui.debug

168

if debug:

168

if debug:

169

dbg('debug.copies: looking into rename from %s to %s\n'

169

dbg('debug.copies: looking into rename from %s to %s\n'

170

% (a, b))

170

% (a, b))

171

limit = _findlimit(repo, a, b)

171

limit = _findlimit(repo, a, b)

172

if debug:

172

if debug:

173

dbg('debug.copies: search limit: %d\n' % limit)

173

dbg('debug.copies: search limit: %d\n' % limit)

174

am = a.manifest()

174

am = a.manifest()

175

176

# find where new files came from

176

# find where new files came from

177

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

178

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

179

cm = {}

179

cm = {}

180

181

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

182

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

183

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

184

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

185

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

186

# this comparison.

186

# this comparison.

187

forwardmissingmatch = match

187

forwardmissingmatch = match

188

if b.p1() == a and b.p2().node() == node.nullid:

188

if b.p1() == a and b.p2().node() == node.nullid:

189

filesmatcher = scmutil.matchfiles(a._repo, b.files())

189

filesmatcher = scmutil.matchfiles(a._repo, b.files())

190

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

190

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

191

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

192

193

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

194

195

if debug:

195

if debug:

196

dbg('debug.copies: missing file to search: %d\n' % len(missing))

196

dbg('debug.copies: missing file to search: %d\n' % len(missing))

197

198

for f in missing:

198

for f in missing:

199

if debug:

199

if debug:

200

dbg('debug.copies: tracing file: %s\n' % f)

200

dbg('debug.copies: tracing file: %s\n' % f)

201

fctx = b[f]

201

fctx = b[f]

202

fctx._ancestrycontext = ancestrycontext

202

fctx._ancestrycontext = ancestrycontext

203

204

if debug:

204

if debug:

205

start = util.timer()

205

start = util.timer()

206

ofctx = _tracefile(fctx, am, limit)

206

ofctx = _tracefile(fctx, am, limit)

207

if ofctx:

207

if ofctx:

208

if debug:

208

if debug:

209

dbg('debug.copies: rename of: %s\n' % ofctx._path)

209

dbg('debug.copies: rename of: %s\n' % ofctx._path)

210

cm[f] = ofctx.path()

210

cm[f] = ofctx.path()

211

if debug:

211

if debug:

212

dbg('debug.copies: time: %f seconds\n'

212

dbg('debug.copies: time: %f seconds\n'

213

% (util.timer() - start))

213

% (util.timer() - start))

214

return cm

214

return cm

215

216

def _forwardcopies(a, b, match=None):

216

def _forwardcopies(a, b, match=None):

217

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

217

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

218

219

match = a.repo().narrowmatch(match)

219

match = a.repo().narrowmatch(match)

220

# check for working copy

220

# check for working copy

221

if b.rev() is None:

221

if b.rev() is None:

222

if a == b.p1():

222

if a == b.p1():

223

# short-circuit to avoid issues with merge states

223

# short-circuit to avoid issues with merge states

224

return _dirstatecopies(b, match)

224

return _dirstatecopies(b._repo, match)

225

226

cm = _committedforwardcopies(a, b.p1(), match)

226

cm = _committedforwardcopies(a, b.p1(), match)

227

# combine copies from dirstate if necessary

227

# combine copies from dirstate if necessary

228

return _chain(a, b, cm, _dirstatecopies(b, match))

228

return _chain(a, b, cm, _dirstatecopies(b._repo, match))

229

return _committedforwardcopies(a, b, match)

229

return _committedforwardcopies(a, b, match)

230

231

def _backwardrenames(a, b):

231

def _backwardrenames(a, b):

232

if a._repo.ui.config('experimental', 'copytrace') == 'off':

232

if a._repo.ui.config('experimental', 'copytrace') == 'off':

233

return {}

233

return {}

234

235

# Even though we're not taking copies into account, 1:n rename situations

235

# Even though we're not taking copies into account, 1:n rename situations

236

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

236

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

237

# arbitrarily pick one of the renames.

237

# arbitrarily pick one of the renames.

238

f = _forwardcopies(b, a)

238

f = _forwardcopies(b, a)

239

r = {}

239

r = {}

240

for k, v in sorted(f.iteritems()):

240

for k, v in sorted(f.iteritems()):

241

# remove copies

241

# remove copies

242

if v in a:

242

if v in a:

243

continue

243

continue

244

r[v] = k

244

r[v] = k

245

return r

245

return r

246

247

def pathcopies(x, y, match=None):

247

def pathcopies(x, y, match=None):

248

"""find {dst@y: src@x} copy mapping for directed compare"""

248

"""find {dst@y: src@x} copy mapping for directed compare"""

249

repo = x._repo

249

repo = x._repo

250

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

250

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

251

if debug:

251

if debug:

252

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

252

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

253

% (x, y))

253

% (x, y))

254

if x == y or not x or not y:

254

if x == y or not x or not y:

255

return {}

255

return {}

256

a = y.ancestor(x)

256

a = y.ancestor(x)

257

if a == x:

257

if a == x:

258

if debug:

258

if debug:

259

repo.ui.debug('debug.copies: search mode: forward\n')

259

repo.ui.debug('debug.copies: search mode: forward\n')

260

return _forwardcopies(x, y, match=match)

260

return _forwardcopies(x, y, match=match)

261

if a == y:

261

if a == y:

262

if debug:

262

if debug:

263

repo.ui.debug('debug.copies: search mode: backward\n')

263

repo.ui.debug('debug.copies: search mode: backward\n')

264

return _backwardrenames(x, y)

264

return _backwardrenames(x, y)

265

if debug:

265

if debug:

266

repo.ui.debug('debug.copies: search mode: combined\n')

266

repo.ui.debug('debug.copies: search mode: combined\n')

267

return _chain(x, y, _backwardrenames(x, a),

267

return _chain(x, y, _backwardrenames(x, a),

268

_forwardcopies(a, y, match=match))

268

_forwardcopies(a, y, match=match))

269

270

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

270

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

271

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

271

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

272

and c2. This is its own function so extensions can easily wrap this call

272

and c2. This is its own function so extensions can easily wrap this call

273

to see what files mergecopies is about to process.

273

to see what files mergecopies is about to process.

274

275

Even though c1 and c2 are not used in this function, they are useful in

275

Even though c1 and c2 are not used in this function, they are useful in

276

other extensions for being able to read the file nodes of the changed files.

276

other extensions for being able to read the file nodes of the changed files.

277

278

"baselabel" can be passed to help distinguish the multiple computations

278

"baselabel" can be passed to help distinguish the multiple computations

279

done in the graft case.

279

done in the graft case.

280

"""

280

"""

281

u1 = sorted(addedinm1 - addedinm2)

281

u1 = sorted(addedinm1 - addedinm2)

282

u2 = sorted(addedinm2 - addedinm1)

282

u2 = sorted(addedinm2 - addedinm1)

283

284

header = " unmatched files in %s"

284

header = " unmatched files in %s"

285

if baselabel:

285

if baselabel:

286

header += ' (from %s)' % baselabel

286

header += ' (from %s)' % baselabel

287

if u1:

287

if u1:

288

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

288

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

289

if u2:

289

if u2:

290

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

290

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

291

292

return u1, u2

292

return u1, u2

293

294

def _makegetfctx(ctx):

294

def _makegetfctx(ctx):

295

"""return a 'getfctx' function suitable for _checkcopies usage

295

"""return a 'getfctx' function suitable for _checkcopies usage

296

297

We have to re-setup the function building 'filectx' for each

297

We have to re-setup the function building 'filectx' for each

298

'_checkcopies' to ensure the linkrev adjustment is properly setup for

298

'_checkcopies' to ensure the linkrev adjustment is properly setup for

299

each. Linkrev adjustment is important to avoid bug in rename

299

each. Linkrev adjustment is important to avoid bug in rename

300

detection. Moreover, having a proper '_ancestrycontext' setup ensures

300

detection. Moreover, having a proper '_ancestrycontext' setup ensures

301

the performance impact of this adjustment is kept limited. Without it,

301

the performance impact of this adjustment is kept limited. Without it,

302

each file could do a full dag traversal making the time complexity of

302

each file could do a full dag traversal making the time complexity of

303

the operation explode (see issue4537).

303

the operation explode (see issue4537).

304

305

This function exists here mostly to limit the impact on stable. Feel

305

This function exists here mostly to limit the impact on stable. Feel

306

free to refactor on default.

306

free to refactor on default.

307

"""

307

"""

308

rev = ctx.rev()

308

rev = ctx.rev()

309

repo = ctx._repo

309

repo = ctx._repo

310

ac = getattr(ctx, '_ancestrycontext', None)

310

ac = getattr(ctx, '_ancestrycontext', None)

311

if ac is None:

311

if ac is None:

312

revs = [rev]

312

revs = [rev]

313

if rev is None:

313

if rev is None:

314

revs = [p.rev() for p in ctx.parents()]

314

revs = [p.rev() for p in ctx.parents()]

315

ac = repo.changelog.ancestors(revs, inclusive=True)

315

ac = repo.changelog.ancestors(revs, inclusive=True)

316

ctx._ancestrycontext = ac

316

ctx._ancestrycontext = ac

317

def makectx(f, n):

317

def makectx(f, n):

318

if n in node.wdirfilenodeids: # in a working context?

318

if n in node.wdirfilenodeids: # in a working context?

319

if ctx.rev() is None:

319

if ctx.rev() is None:

320

return ctx.filectx(f)

320

return ctx.filectx(f)

321

return repo[None][f]

321

return repo[None][f]

322

fctx = repo.filectx(f, fileid=n)

322

fctx = repo.filectx(f, fileid=n)

323

# setup only needed for filectx not create from a changectx

323

# setup only needed for filectx not create from a changectx

324

fctx._ancestrycontext = ac

324

fctx._ancestrycontext = ac

325

fctx._descendantrev = rev

325

fctx._descendantrev = rev

326

return fctx

326

return fctx

327

return util.lrucachefunc(makectx)

327

return util.lrucachefunc(makectx)

328

329

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

329

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

330

"""combine partial copy paths"""

330

"""combine partial copy paths"""

331

remainder = {}

331

remainder = {}

332

for f in copyfrom:

332

for f in copyfrom:

333

if f in copyto:

333

if f in copyto:

334

finalcopy[copyto[f]] = copyfrom[f]

334

finalcopy[copyto[f]] = copyfrom[f]

335

del copyto[f]

335

del copyto[f]

336

for f in incompletediverge:

336

for f in incompletediverge:

337

assert f not in diverge

337

assert f not in diverge

338

ic = incompletediverge[f]

338

ic = incompletediverge[f]

339

if ic[0] in copyto:

339

if ic[0] in copyto:

340

diverge[f] = [copyto[ic[0]], ic[1]]

340

diverge[f] = [copyto[ic[0]], ic[1]]

341

else:

341

else:

342

remainder[f] = ic

342

remainder[f] = ic

343

return remainder

343

return remainder

344

345

def mergecopies(repo, c1, c2, base):

345

def mergecopies(repo, c1, c2, base):

346

"""

346

"""

347

The function calling different copytracing algorithms on the basis of config

347

The function calling different copytracing algorithms on the basis of config

348

which find moves and copies between context c1 and c2 that are relevant for

348

which find moves and copies between context c1 and c2 that are relevant for

349

merging. 'base' will be used as the merge base.

349

merging. 'base' will be used as the merge base.

350

351

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

351

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

352

files that were moved/ copied in one merge parent and modified in another.

352

files that were moved/ copied in one merge parent and modified in another.

353

For example:

353

For example:

354

355

o ---> 4 another commit

355

o ---> 4 another commit

356

|

356

|

357

| o ---> 3 commit that modifies a.txt

357

| o ---> 3 commit that modifies a.txt

358

| /

358

| /

359

o / ---> 2 commit that moves a.txt to b.txt

359

o / ---> 2 commit that moves a.txt to b.txt

360

|/

360

|/

361

o ---> 1 merge base

361

o ---> 1 merge base

362

363

If we try to rebase revision 3 on revision 4, since there is no a.txt in

363

If we try to rebase revision 3 on revision 4, since there is no a.txt in

364

revision 4, and if user have copytrace disabled, we prints the following

364

revision 4, and if user have copytrace disabled, we prints the following

365

message:

365

message:

366

367

```other changed <file> which local deleted```

367

```other changed <file> which local deleted```

368

369

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

369

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

370

"dirmove".

370

"dirmove".

371

372

"copy" is a mapping from destination name -> source name,

372

"copy" is a mapping from destination name -> source name,

373

where source is in c1 and destination is in c2 or vice-versa.

373

where source is in c1 and destination is in c2 or vice-versa.

374

375

"movewithdir" is a mapping from source name -> destination name,

375

"movewithdir" is a mapping from source name -> destination name,

376

where the file at source present in one context but not the other

376

where the file at source present in one context but not the other

377

needs to be moved to destination by the merge process, because the

377

needs to be moved to destination by the merge process, because the

378

other context moved the directory it is in.

378

other context moved the directory it is in.

379

380

"diverge" is a mapping of source name -> list of destination names

380

"diverge" is a mapping of source name -> list of destination names

381

for divergent renames.

381

for divergent renames.

382

383

"renamedelete" is a mapping of source name -> list of destination

383

"renamedelete" is a mapping of source name -> list of destination

384

names for files deleted in c1 that were renamed in c2 or vice-versa.

384

names for files deleted in c1 that were renamed in c2 or vice-versa.

385

386

"dirmove" is a mapping of detected source dir -> destination dir renames.

386

"dirmove" is a mapping of detected source dir -> destination dir renames.

387

This is needed for handling changes to new files previously grafted into

387

This is needed for handling changes to new files previously grafted into

388

renamed directories.

388

renamed directories.

389

"""

389

"""

390

# avoid silly behavior for update from empty dir

390

# avoid silly behavior for update from empty dir

391

if not c1 or not c2 or c1 == c2:

391

if not c1 or not c2 or c1 == c2:

392

return {}, {}, {}, {}, {}

392

return {}, {}, {}, {}, {}

393

394

narrowmatch = c1.repo().narrowmatch()

395

394

# avoid silly behavior for parent -> working dir

396

# avoid silly behavior for parent -> working dir

395

if c2.node() is None and c1.node() == repo.dirstate.p1():

397

if c2.node() is None and c1.node() == repo.dirstate.p1():

396

return repo.~~dirstate~~.~~copies~~(), {}, {}, {}, {}

398

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

397

399

398

copytracing = repo.ui.config('experimental', 'copytrace')

400

copytracing = repo.ui.config('experimental', 'copytrace')

399

boolctrace = stringutil.parsebool(copytracing)

401

boolctrace = stringutil.parsebool(copytracing)

400

402

401

# Copy trace disabling is explicitly below the node == p1 logic above

403

# Copy trace disabling is explicitly below the node == p1 logic above

402

# because the logic above is required for a simple copy to be kept across a

404

# because the logic above is required for a simple copy to be kept across a

403

# rebase.

405

# rebase.

404

if copytracing == 'heuristics':

406

if copytracing == 'heuristics':

405

# Do full copytracing if only non-public revisions are involved as

407

# Do full copytracing if only non-public revisions are involved as

406

# that will be fast enough and will also cover the copies which could

408

# that will be fast enough and will also cover the copies which could

407

# be missed by heuristics

409

# be missed by heuristics

408

if _isfullcopytraceable(repo, c1, base):

410

if _isfullcopytraceable(repo, c1, base):

409

return _fullcopytracing(repo, c1, c2, base)

411

return _fullcopytracing(repo, c1, c2, base)

410

return _heuristicscopytracing(repo, c1, c2, base)

412

return _heuristicscopytracing(repo, c1, c2, base)

411

elif boolctrace is False:

413

elif boolctrace is False:

412

# stringutil.parsebool() returns None when it is unable to parse the

414

# stringutil.parsebool() returns None when it is unable to parse the

413

# value, so we should rely on making sure copytracing is on such cases

415

# value, so we should rely on making sure copytracing is on such cases

414

return {}, {}, {}, {}, {}

416

return {}, {}, {}, {}, {}

415

else:

417

else:

416

return _fullcopytracing(repo, c1, c2, base)

418

return _fullcopytracing(repo, c1, c2, base)

417

419

418

def _isfullcopytraceable(repo, c1, base):

420

def _isfullcopytraceable(repo, c1, base):

419

""" Checks that if base, source and destination are all no-public branches,

421

""" Checks that if base, source and destination are all no-public branches,

420

if yes let's use the full copytrace algorithm for increased capabilities

422

if yes let's use the full copytrace algorithm for increased capabilities

421

since it will be fast enough.

423

since it will be fast enough.

422

424

423

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

425

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

424

number of changesets from c1 to base such that if number of changesets are

426

number of changesets from c1 to base such that if number of changesets are

425

more than the limit, full copytracing algorithm won't be used.

427

more than the limit, full copytracing algorithm won't be used.

426

"""

428

"""

427

if c1.rev() is None:

429

if c1.rev() is None:

428

c1 = c1.p1()

430

c1 = c1.p1()

429

if c1.mutable() and base.mutable():

431

if c1.mutable() and base.mutable():

430

sourcecommitlimit = repo.ui.configint('experimental',

432

sourcecommitlimit = repo.ui.configint('experimental',

431

'copytrace.sourcecommitlimit')

433

'copytrace.sourcecommitlimit')

432

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

434

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

433

return commits < sourcecommitlimit

435

return commits < sourcecommitlimit

434

return False

436

return False

435

437

436

def _fullcopytracing(repo, c1, c2, base):

438

def _fullcopytracing(repo, c1, c2, base):

437

""" The full copytracing algorithm which finds all the new files that were

439

""" The full copytracing algorithm which finds all the new files that were

438

added from merge base up to the top commit and for each file it checks if

440

added from merge base up to the top commit and for each file it checks if

439

this file was copied from another file.

441

this file was copied from another file.

440

442

441

This is pretty slow when a lot of changesets are involved but will track all

443

This is pretty slow when a lot of changesets are involved but will track all

442

the copies.

444

the copies.

443

"""

445

"""

444

# In certain scenarios (e.g. graft, update or rebase), base can be

446

# In certain scenarios (e.g. graft, update or rebase), base can be

445

# overridden We still need to know a real common ancestor in this case We

447

# overridden We still need to know a real common ancestor in this case We

446

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

448

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

447

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

449

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

448

# caller may not know if the revision passed in lieu of the CA is a genuine

450

# caller may not know if the revision passed in lieu of the CA is a genuine

449

# common ancestor or not without explicitly checking it, it's better to

451

# common ancestor or not without explicitly checking it, it's better to

450

# determine that here.

452

# determine that here.

451

#

453

#

452

# base.isancestorof(wc) is False, work around that

454

# base.isancestorof(wc) is False, work around that

453

_c1 = c1.p1() if c1.rev() is None else c1

455

_c1 = c1.p1() if c1.rev() is None else c1

454

_c2 = c2.p1() if c2.rev() is None else c2

456

_c2 = c2.p1() if c2.rev() is None else c2

455

# an endpoint is "dirty" if it isn't a descendant of the merge base

457

# an endpoint is "dirty" if it isn't a descendant of the merge base

456

# if we have a dirty endpoint, we need to trigger graft logic, and also

458

# if we have a dirty endpoint, we need to trigger graft logic, and also

457

# keep track of which endpoint is dirty

459

# keep track of which endpoint is dirty

458

dirtyc1 = not base.isancestorof(_c1)

460

dirtyc1 = not base.isancestorof(_c1)

459

dirtyc2 = not base.isancestorof(_c2)

461

dirtyc2 = not base.isancestorof(_c2)

460

graft = dirtyc1 or dirtyc2

462

graft = dirtyc1 or dirtyc2

461

tca = base

463

tca = base

462

if graft:

464

if graft:

463

tca = _c1.ancestor(_c2)

465

tca = _c1.ancestor(_c2)

464

466

465

limit = _findlimit(repo, c1, c2)

467

limit = _findlimit(repo, c1, c2)

466

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

468

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

467

469

468

m1 = c1.manifest()

470

m1 = c1.manifest()

469

m2 = c2.manifest()

471

m2 = c2.manifest()

470

mb = base.manifest()

472

mb = base.manifest()

471

473

472

# gather data from _checkcopies:

474

# gather data from _checkcopies:

473

# - diverge = record all diverges in this dict

475

# - diverge = record all diverges in this dict

474

# - copy = record all non-divergent copies in this dict

476

# - copy = record all non-divergent copies in this dict

475

# - fullcopy = record all copies in this dict

477

# - fullcopy = record all copies in this dict

476

# - incomplete = record non-divergent partial copies here

478

# - incomplete = record non-divergent partial copies here

477

# - incompletediverge = record divergent partial copies here

479

# - incompletediverge = record divergent partial copies here

478

diverge = {} # divergence data is shared

480

diverge = {} # divergence data is shared

479

incompletediverge = {}

481

incompletediverge = {}

480

data1 = {'copy': {},

482

data1 = {'copy': {},

481

'fullcopy': {},

483

'fullcopy': {},

482

'incomplete': {},

484

'incomplete': {},

483

'diverge': diverge,

485

'diverge': diverge,

484

'incompletediverge': incompletediverge,

486

'incompletediverge': incompletediverge,

485

}

487

}

486

data2 = {'copy': {},

488

data2 = {'copy': {},

487

'fullcopy': {},

489

'fullcopy': {},

488

'incomplete': {},

490

'incomplete': {},

489

'diverge': diverge,

491

'diverge': diverge,

490

'incompletediverge': incompletediverge,

492

'incompletediverge': incompletediverge,

491

}

493

}

492

494

493

# find interesting file sets from manifests

495

# find interesting file sets from manifests

494

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

496

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

495

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

497

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

496

bothnew = sorted(addedinm1 & addedinm2)

498

bothnew = sorted(addedinm1 & addedinm2)

497

if tca == base:

499

if tca == base:

498

# unmatched file from base

500

# unmatched file from base

499

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

501

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

500

u1u, u2u = u1r, u2r

502

u1u, u2u = u1r, u2r

501

else:

503

else:

502

# unmatched file from base (DAG rotation in the graft case)

504

# unmatched file from base (DAG rotation in the graft case)

503

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

505

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

504

baselabel='base')

506

baselabel='base')

505

# unmatched file from topological common ancestors (no DAG rotation)

507

# unmatched file from topological common ancestors (no DAG rotation)

506

# need to recompute this for directory move handling when grafting

508

# need to recompute this for directory move handling when grafting

507

mta = tca.manifest()

509

mta = tca.manifest()

508

u1u, u2u = _computenonoverlap(repo, c1, c2,

510

u1u, u2u = _computenonoverlap(repo, c1, c2,

509

m1.filesnotin(mta, repo.narrowmatch()),

511

m1.filesnotin(mta, repo.narrowmatch()),

510

m2.filesnotin(mta, repo.narrowmatch()),

512

m2.filesnotin(mta, repo.narrowmatch()),

511

baselabel='topological common ancestor')

513

baselabel='topological common ancestor')

512

514

513

for f in u1u:

515

for f in u1u:

514

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

516

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

515

517

516

for f in u2u:

518

for f in u2u:

517

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

519

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

518

520

519

copy = dict(data1['copy'])

521

copy = dict(data1['copy'])

520

copy.update(data2['copy'])

522

copy.update(data2['copy'])

521

fullcopy = dict(data1['fullcopy'])

523

fullcopy = dict(data1['fullcopy'])

522

fullcopy.update(data2['fullcopy'])

524

fullcopy.update(data2['fullcopy'])

523

525

524

if dirtyc1:

526

if dirtyc1:

525

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

527

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

526

incompletediverge)

528

incompletediverge)

527

else:

529

else:

528

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

530

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

529

incompletediverge)

531

incompletediverge)

530

532

531

renamedelete = {}

533

renamedelete = {}

532

renamedeleteset = set()

534

renamedeleteset = set()

533

divergeset = set()

535

divergeset = set()

534

for of, fl in list(diverge.items()):

536

for of, fl in list(diverge.items()):

535

if len(fl) == 1 or of in c1 or of in c2:

537

if len(fl) == 1 or of in c1 or of in c2:

536

del diverge[of] # not actually divergent, or not a rename

538

del diverge[of] # not actually divergent, or not a rename

537

if of not in c1 and of not in c2:

539

if of not in c1 and of not in c2:

538

# renamed on one side, deleted on the other side, but filter

540

# renamed on one side, deleted on the other side, but filter

539

# out files that have been renamed and then deleted

541

# out files that have been renamed and then deleted

540

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

542

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

541

renamedeleteset.update(fl) # reverse map for below

543

renamedeleteset.update(fl) # reverse map for below

542

else:

544

else:

543

divergeset.update(fl) # reverse map for below

545

divergeset.update(fl) # reverse map for below

544

546

545

if bothnew:

547

if bothnew:

546

repo.ui.debug(" unmatched files new in both:\n %s\n"

548

repo.ui.debug(" unmatched files new in both:\n %s\n"

547

% "\n ".join(bothnew))

549

% "\n ".join(bothnew))

548

bothdiverge = {}

550

bothdiverge = {}

549

bothincompletediverge = {}

551

bothincompletediverge = {}

550

remainder = {}

552

remainder = {}

551

both1 = {'copy': {},

553

both1 = {'copy': {},

552

'fullcopy': {},

554

'fullcopy': {},

553

'incomplete': {},

555

'incomplete': {},

554

'diverge': bothdiverge,

556

'diverge': bothdiverge,

555

'incompletediverge': bothincompletediverge

557

'incompletediverge': bothincompletediverge

556

}

558

}

557

both2 = {'copy': {},

559

both2 = {'copy': {},

558

'fullcopy': {},

560

'fullcopy': {},

559

'incomplete': {},

561

'incomplete': {},

560

'diverge': bothdiverge,

562

'diverge': bothdiverge,

561

'incompletediverge': bothincompletediverge

563

'incompletediverge': bothincompletediverge

562

}

564

}

563

for f in bothnew:

565

for f in bothnew:

564

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

566

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

565

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

567

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

566

if dirtyc1:

568

if dirtyc1:

567

# incomplete copies may only be found on the "dirty" side for bothnew

569

# incomplete copies may only be found on the "dirty" side for bothnew

568

assert not both2['incomplete']

570

assert not both2['incomplete']

569

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

571

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

570

bothincompletediverge)

572

bothincompletediverge)

571

elif dirtyc2:

573

elif dirtyc2:

572

assert not both1['incomplete']

574

assert not both1['incomplete']

573

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

575

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

574

bothincompletediverge)

576

bothincompletediverge)

575

else:

577

else:

576

# incomplete copies and divergences can't happen outside grafts

578

# incomplete copies and divergences can't happen outside grafts

577

assert not both1['incomplete']

579

assert not both1['incomplete']

578

assert not both2['incomplete']

580

assert not both2['incomplete']

579

assert not bothincompletediverge

581

assert not bothincompletediverge

580

for f in remainder:

582

for f in remainder:

581

assert f not in bothdiverge

583

assert f not in bothdiverge

582

ic = remainder[f]

584

ic = remainder[f]

583

if ic[0] in (m1 if dirtyc1 else m2):

585

if ic[0] in (m1 if dirtyc1 else m2):

584

# backed-out rename on one side, but watch out for deleted files

586

# backed-out rename on one side, but watch out for deleted files

585

bothdiverge[f] = ic

587

bothdiverge[f] = ic

586

for of, fl in bothdiverge.items():

588

for of, fl in bothdiverge.items():

587

if len(fl) == 2 and fl[0] == fl[1]:

589

if len(fl) == 2 and fl[0] == fl[1]:

588

copy[fl[0]] = of # not actually divergent, just matching renames

590

copy[fl[0]] = of # not actually divergent, just matching renames

589

591

590

if fullcopy and repo.ui.debugflag:

592

if fullcopy and repo.ui.debugflag:

591

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

593

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

592

"% = renamed and deleted):\n")

594

"% = renamed and deleted):\n")

593

for f in sorted(fullcopy):

595

for f in sorted(fullcopy):

594

note = ""

596

note = ""

595

if f in copy:

597

if f in copy:

596

note += "*"

598

note += "*"

597

if f in divergeset:

599

if f in divergeset:

598

note += "!"

600

note += "!"

599

if f in renamedeleteset:

601

if f in renamedeleteset:

600

note += "%"

602

note += "%"

601

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

603

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

602

note))

604

note))

603

del divergeset

605

del divergeset

604

606

605

if not fullcopy:

607

if not fullcopy:

606

return copy, {}, diverge, renamedelete, {}

608

return copy, {}, diverge, renamedelete, {}

607

609

608

repo.ui.debug(" checking for directory renames\n")

610

repo.ui.debug(" checking for directory renames\n")

609

611

610

# generate a directory move map

612

# generate a directory move map

611

d1, d2 = c1.dirs(), c2.dirs()

613

d1, d2 = c1.dirs(), c2.dirs()

612

# Hack for adding '', which is not otherwise added, to d1 and d2

614

# Hack for adding '', which is not otherwise added, to d1 and d2

613

d1.addpath('/')

615

d1.addpath('/')

614

d2.addpath('/')

616

d2.addpath('/')

615

invalid = set()

617

invalid = set()

616

dirmove = {}

618

dirmove = {}

617

619

618

# examine each file copy for a potential directory move, which is

620

# examine each file copy for a potential directory move, which is

619

# when all the files in a directory are moved to a new directory

621

# when all the files in a directory are moved to a new directory

620

for dst, src in fullcopy.iteritems():

622

for dst, src in fullcopy.iteritems():

621

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

623

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

622

if dsrc in invalid:

624

if dsrc in invalid:

623

# already seen to be uninteresting

625

# already seen to be uninteresting

624

continue

626

continue

625

elif dsrc in d1 and ddst in d1:

627

elif dsrc in d1 and ddst in d1:

626

# directory wasn't entirely moved locally

628

# directory wasn't entirely moved locally

627

invalid.add(dsrc)

629

invalid.add(dsrc)

628

elif dsrc in d2 and ddst in d2:

630

elif dsrc in d2 and ddst in d2:

629

# directory wasn't entirely moved remotely

631

# directory wasn't entirely moved remotely

630

invalid.add(dsrc)

632

invalid.add(dsrc)

631

elif dsrc in dirmove and dirmove[dsrc] != ddst:

633

elif dsrc in dirmove and dirmove[dsrc] != ddst:

632

# files from the same directory moved to two different places

634

# files from the same directory moved to two different places

633

invalid.add(dsrc)

635

invalid.add(dsrc)

634

else:

636

else:

635

# looks good so far

637

# looks good so far

636

dirmove[dsrc] = ddst

638

dirmove[dsrc] = ddst

637

639

638

for i in invalid:

640

for i in invalid:

639

if i in dirmove:

641

if i in dirmove:

640

del dirmove[i]

642

del dirmove[i]

641

del d1, d2, invalid

643

del d1, d2, invalid

642

644

643

if not dirmove:

645

if not dirmove:

644

return copy, {}, diverge, renamedelete, {}

646

return copy, {}, diverge, renamedelete, {}

645

647

646

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

648

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

647

649

648

for d in dirmove:

650

for d in dirmove:

649

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

651

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

650

(d, dirmove[d]))

652

(d, dirmove[d]))

651

653

652

movewithdir = {}

654

movewithdir = {}

653

# check unaccounted nonoverlapping files against directory moves

655

# check unaccounted nonoverlapping files against directory moves

654

for f in u1r + u2r:

656

for f in u1r + u2r:

655

if f not in fullcopy:

657

if f not in fullcopy:

656

for d in dirmove:

658

for d in dirmove:

657

if f.startswith(d):

659

if f.startswith(d):

658

# new file added in a directory that was moved, move it

660

# new file added in a directory that was moved, move it

659

df = dirmove[d] + f[len(d):]

661

df = dirmove[d] + f[len(d):]

660

if df not in copy:

662

if df not in copy:

661

movewithdir[f] = df

663

movewithdir[f] = df

662

repo.ui.debug((" pending file src: '%s' -> "

664

repo.ui.debug((" pending file src: '%s' -> "

663

"dst: '%s'\n") % (f, df))

665

"dst: '%s'\n") % (f, df))

664

break

666

break

665

667

666

return copy, movewithdir, diverge, renamedelete, dirmove

668

return copy, movewithdir, diverge, renamedelete, dirmove

667

669

668

def _heuristicscopytracing(repo, c1, c2, base):

670

def _heuristicscopytracing(repo, c1, c2, base):

669

""" Fast copytracing using filename heuristics

671

""" Fast copytracing using filename heuristics

670

672

671

Assumes that moves or renames are of following two types:

673

Assumes that moves or renames are of following two types:

672

674

673

1) Inside a directory only (same directory name but different filenames)

675

1) Inside a directory only (same directory name but different filenames)

674

2) Move from one directory to another

676

2) Move from one directory to another

675

(same filenames but different directory names)

677

(same filenames but different directory names)

676

678

677

Works only when there are no merge commits in the "source branch".

679

Works only when there are no merge commits in the "source branch".

678

Source branch is commits from base up to c2 not including base.

680

Source branch is commits from base up to c2 not including base.

679

681

680

If merge is involved it fallbacks to _fullcopytracing().

682

If merge is involved it fallbacks to _fullcopytracing().

681

683

682

Can be used by setting the following config:

684

Can be used by setting the following config:

683

685

684

[experimental]

686

[experimental]

685

copytrace = heuristics

687

copytrace = heuristics

686

688

687

In some cases the copy/move candidates found by heuristics can be very large

689

In some cases the copy/move candidates found by heuristics can be very large

688

in number and that will make the algorithm slow. The number of possible

690

in number and that will make the algorithm slow. The number of possible

689

candidates to check can be limited by using the config

691

candidates to check can be limited by using the config

690

`experimental.copytrace.movecandidateslimit` which defaults to 100.

692

`experimental.copytrace.movecandidateslimit` which defaults to 100.

691

"""

693

"""

692

694

693

if c1.rev() is None:

695

if c1.rev() is None:

694

c1 = c1.p1()

696

c1 = c1.p1()

695

if c2.rev() is None:

697

if c2.rev() is None:

696

c2 = c2.p1()

698

c2 = c2.p1()

697

699

698

copies = {}

700

copies = {}

699

701

700

changedfiles = set()

702

changedfiles = set()

701

m1 = c1.manifest()

703

m1 = c1.manifest()

702

if not repo.revs('%d::%d', base.rev(), c2.rev()):

704

if not repo.revs('%d::%d', base.rev(), c2.rev()):

703

# If base is not in c2 branch, we switch to fullcopytracing

705

# If base is not in c2 branch, we switch to fullcopytracing

704

repo.ui.debug("switching to full copytracing as base is not "

706

repo.ui.debug("switching to full copytracing as base is not "

705

"an ancestor of c2\n")

707

"an ancestor of c2\n")

706

return _fullcopytracing(repo, c1, c2, base)

708

return _fullcopytracing(repo, c1, c2, base)

707

709

708

ctx = c2

710

ctx = c2

709

while ctx != base:

711

while ctx != base:

710

if len(ctx.parents()) == 2:

712

if len(ctx.parents()) == 2:

711

# To keep things simple let's not handle merges

713

# To keep things simple let's not handle merges

712

repo.ui.debug("switching to full copytracing because of merges\n")

714

repo.ui.debug("switching to full copytracing because of merges\n")

713

return _fullcopytracing(repo, c1, c2, base)

715

return _fullcopytracing(repo, c1, c2, base)

714

changedfiles.update(ctx.files())

716

changedfiles.update(ctx.files())

715

ctx = ctx.p1()

717

ctx = ctx.p1()

716

718

717

cp = _forwardcopies(base, c2)

719

cp = _forwardcopies(base, c2)

718

for dst, src in cp.iteritems():

720

for dst, src in cp.iteritems():

719

if src in m1:

721

if src in m1:

720

copies[dst] = src

722

copies[dst] = src

721

723

722

# file is missing if it isn't present in the destination, but is present in

724

# file is missing if it isn't present in the destination, but is present in

723

# the base and present in the source.

725

# the base and present in the source.

724

# Presence in the base is important to exclude added files, presence in the

726

# Presence in the base is important to exclude added files, presence in the

725

# source is important to exclude removed files.

727

# source is important to exclude removed files.

726

filt = lambda f: f not in m1 and f in base and f in c2

728

filt = lambda f: f not in m1 and f in base and f in c2

727

missingfiles = [f for f in changedfiles if filt(f)]

729

missingfiles = [f for f in changedfiles if filt(f)]

728

730

729

if missingfiles:

731

if missingfiles:

730

basenametofilename = collections.defaultdict(list)

732

basenametofilename = collections.defaultdict(list)

731

dirnametofilename = collections.defaultdict(list)

733

dirnametofilename = collections.defaultdict(list)

732

734

733

for f in m1.filesnotin(base.manifest()):

735

for f in m1.filesnotin(base.manifest()):

734

basename = os.path.basename(f)

736

basename = os.path.basename(f)

735

dirname = os.path.dirname(f)

737

dirname = os.path.dirname(f)

736

basenametofilename[basename].append(f)

738

basenametofilename[basename].append(f)

737

dirnametofilename[dirname].append(f)

739

dirnametofilename[dirname].append(f)

738

740

739

for f in missingfiles:

741

for f in missingfiles:

740

basename = os.path.basename(f)

742

basename = os.path.basename(f)

741

dirname = os.path.dirname(f)

743

dirname = os.path.dirname(f)

742

samebasename = basenametofilename[basename]

744

samebasename = basenametofilename[basename]

743

samedirname = dirnametofilename[dirname]

745

samedirname = dirnametofilename[dirname]

744

movecandidates = samebasename + samedirname

746

movecandidates = samebasename + samedirname

745

# f is guaranteed to be present in c2, that's why

747

# f is guaranteed to be present in c2, that's why

746

# c2.filectx(f) won't fail

748

# c2.filectx(f) won't fail

747

f2 = c2.filectx(f)

749

f2 = c2.filectx(f)

748

# we can have a lot of candidates which can slow down the heuristics

750

# we can have a lot of candidates which can slow down the heuristics

749

# config value to limit the number of candidates moves to check

751

# config value to limit the number of candidates moves to check

750

maxcandidates = repo.ui.configint('experimental',

752

maxcandidates = repo.ui.configint('experimental',

751

'copytrace.movecandidateslimit')

753

'copytrace.movecandidateslimit')

752

754

753

if len(movecandidates) > maxcandidates:

755

if len(movecandidates) > maxcandidates:

754

repo.ui.status(_("skipping copytracing for '%s', more "

756

repo.ui.status(_("skipping copytracing for '%s', more "

755

"candidates than the limit: %d\n")

757

"candidates than the limit: %d\n")

756

% (f, len(movecandidates)))

758

% (f, len(movecandidates)))

757

continue

759

continue

758

760

759

for candidate in movecandidates:

761

for candidate in movecandidates:

760

f1 = c1.filectx(candidate)

762

f1 = c1.filectx(candidate)

761

if _related(f1, f2):

763

if _related(f1, f2):

762

# if there are a few related copies then we'll merge

764

# if there are a few related copies then we'll merge

763

# changes into all of them. This matches the behaviour

765

# changes into all of them. This matches the behaviour

764

# of upstream copytracing

766

# of upstream copytracing

765

copies[candidate] = f

767

copies[candidate] = f

766

768

767

return copies, {}, {}, {}, {}

769

return copies, {}, {}, {}, {}

768

770

769

def _related(f1, f2):

771

def _related(f1, f2):

770

"""return True if f1 and f2 filectx have a common ancestor

772

"""return True if f1 and f2 filectx have a common ancestor

771

773

772

Walk back to common ancestor to see if the two files originate

774

Walk back to common ancestor to see if the two files originate

773

from the same file. Since workingfilectx's rev() is None it messes

775

from the same file. Since workingfilectx's rev() is None it messes

774

up the integer comparison logic, hence the pre-step check for

776

up the integer comparison logic, hence the pre-step check for

775

None (f1 and f2 can only be workingfilectx's initially).

777

None (f1 and f2 can only be workingfilectx's initially).

776

"""

778

"""

777

779

778

if f1 == f2:

780

if f1 == f2:

779

return True # a match

781

return True # a match

780

782

781

g1, g2 = f1.ancestors(), f2.ancestors()

783

g1, g2 = f1.ancestors(), f2.ancestors()

782

try:

784

try:

783

f1r, f2r = f1.linkrev(), f2.linkrev()

785

f1r, f2r = f1.linkrev(), f2.linkrev()

784

786

785

if f1r is None:

787

if f1r is None:

786

f1 = next(g1)

788

f1 = next(g1)

787

if f2r is None:

789

if f2r is None:

788

f2 = next(g2)

790

f2 = next(g2)

789

791

790

while True:

792

while True:

791

f1r, f2r = f1.linkrev(), f2.linkrev()

793

f1r, f2r = f1.linkrev(), f2.linkrev()

792

if f1r > f2r:

794

if f1r > f2r:

793

f1 = next(g1)

795

f1 = next(g1)

794

elif f2r > f1r:

796

elif f2r > f1r:

795

f2 = next(g2)

797

f2 = next(g2)

796

else: # f1 and f2 point to files in the same linkrev

798

else: # f1 and f2 point to files in the same linkrev

797

return f1 == f2 # true if they point to the same file

799

return f1 == f2 # true if they point to the same file

798

except StopIteration:

800

except StopIteration:

799

return False

801

return False

800

802

801

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

803

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

802

"""

804

"""

803

check possible copies of f from msrc to mdst

805

check possible copies of f from msrc to mdst

804

806

805

srcctx = starting context for f in msrc

807

srcctx = starting context for f in msrc

806

dstctx = destination context for f in mdst

808

dstctx = destination context for f in mdst

807

f = the filename to check (as in msrc)

809

f = the filename to check (as in msrc)

808

base = the changectx used as a merge base

810

base = the changectx used as a merge base

809

tca = topological common ancestor for graft-like scenarios

811

tca = topological common ancestor for graft-like scenarios

810

remotebase = True if base is outside tca::srcctx, False otherwise

812

remotebase = True if base is outside tca::srcctx, False otherwise

811

limit = the rev number to not search beyond

813

limit = the rev number to not search beyond

812

data = dictionary of dictionary to store copy data. (see mergecopies)

814

data = dictionary of dictionary to store copy data. (see mergecopies)

813

815

814

note: limit is only an optimization, and provides no guarantee that

816

note: limit is only an optimization, and provides no guarantee that

815

irrelevant revisions will not be visited

817

irrelevant revisions will not be visited

816

there is no easy way to make this algorithm stop in a guaranteed way

818

there is no easy way to make this algorithm stop in a guaranteed way

817

once it "goes behind a certain revision".

819

once it "goes behind a certain revision".

818

"""

820

"""

819

821

820

msrc = srcctx.manifest()

822

msrc = srcctx.manifest()

821

mdst = dstctx.manifest()

823

mdst = dstctx.manifest()

822

mb = base.manifest()

824

mb = base.manifest()

823

mta = tca.manifest()

825

mta = tca.manifest()

824

# Might be true if this call is about finding backward renames,

826

# Might be true if this call is about finding backward renames,

825

# This happens in the case of grafts because the DAG is then rotated.

827

# This happens in the case of grafts because the DAG is then rotated.

826

# If the file exists in both the base and the source, we are not looking

828

# If the file exists in both the base and the source, we are not looking

827

# for a rename on the source side, but on the part of the DAG that is

829

# for a rename on the source side, but on the part of the DAG that is

828

# traversed backwards.

830

# traversed backwards.

829

#

831

#

830

# In the case there is both backward and forward renames (before and after

832

# In the case there is both backward and forward renames (before and after

831

# the base) this is more complicated as we must detect a divergence.

833

# the base) this is more complicated as we must detect a divergence.

832

# We use 'backwards = False' in that case.

834

# We use 'backwards = False' in that case.

833

backwards = not remotebase and base != tca and f in mb

835

backwards = not remotebase and base != tca and f in mb

834

getsrcfctx = _makegetfctx(srcctx)

836

getsrcfctx = _makegetfctx(srcctx)

835

getdstfctx = _makegetfctx(dstctx)

837

getdstfctx = _makegetfctx(dstctx)

836

838

837

if msrc[f] == mb.get(f) and not remotebase:

839

if msrc[f] == mb.get(f) and not remotebase:

838

# Nothing to merge

840

# Nothing to merge

839

return

841

return

840

842

841

of = None

843

of = None

842

seen = {f}

844

seen = {f}

843

for oc in getsrcfctx(f, msrc[f]).ancestors():

845

for oc in getsrcfctx(f, msrc[f]).ancestors():

844

of = oc.path()

846

of = oc.path()

845

if of in seen:

847

if of in seen:

846

# check limit late - grab last rename before

848

# check limit late - grab last rename before

847

if oc.linkrev() < limit:

849

if oc.linkrev() < limit:

848

break

850

break

849

continue

851

continue

850

seen.add(of)

852

seen.add(of)

851

853

852

# remember for dir rename detection

854

# remember for dir rename detection

853

if backwards:

855

if backwards:

854

data['fullcopy'][of] = f # grafting backwards through renames

856

data['fullcopy'][of] = f # grafting backwards through renames

855

else:

857

else:

856

data['fullcopy'][f] = of

858

data['fullcopy'][f] = of

857

if of not in mdst:

859

if of not in mdst:

858

continue # no match, keep looking

860

continue # no match, keep looking

859

if mdst[of] == mb.get(of):

861

if mdst[of] == mb.get(of):

860

return # no merge needed, quit early

862

return # no merge needed, quit early

861

c2 = getdstfctx(of, mdst[of])

863

c2 = getdstfctx(of, mdst[of])

862

# c2 might be a plain new file on added on destination side that is

864

# c2 might be a plain new file on added on destination side that is

863

# unrelated to the droids we are looking for.

865

# unrelated to the droids we are looking for.

864

cr = _related(oc, c2)

866

cr = _related(oc, c2)

865

if cr and (of == f or of == c2.path()): # non-divergent

867

if cr and (of == f or of == c2.path()): # non-divergent

866

if backwards:

868

if backwards:

867

data['copy'][of] = f

869

data['copy'][of] = f

868

elif of in mb:

870

elif of in mb:

869

data['copy'][f] = of

871

data['copy'][f] = of

870

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

872

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

871

data['copy'][of] = f

873

data['copy'][of] = f

872

del data['fullcopy'][f]

874

del data['fullcopy'][f]

873

data['fullcopy'][of] = f

875

data['fullcopy'][of] = f

874

else: # divergence w.r.t. graft CA on one side of topological CA

876

else: # divergence w.r.t. graft CA on one side of topological CA

875

for sf in seen:

877

for sf in seen:

876

if sf in mb:

878

if sf in mb:

877

assert sf not in data['diverge']

879

assert sf not in data['diverge']

878

data['diverge'][sf] = [f, of]

880

data['diverge'][sf] = [f, of]

879

break

881

break

880

return

882

return

881

883

882

if of in mta:

884

if of in mta:

883

if backwards or remotebase:

885

if backwards or remotebase:

884

data['incomplete'][of] = f

886

data['incomplete'][of] = f

885

else:

887

else:

886

for sf in seen:

888

for sf in seen:

887

if sf in mb:

889

if sf in mb:

888

if tca == base:

890

if tca == base:

889

data['diverge'].setdefault(sf, []).append(f)

891

data['diverge'].setdefault(sf, []).append(f)

890

else:

892

else:

891

data['incompletediverge'][sf] = [of, f]

893

data['incompletediverge'][sf] = [of, f]

892

return

894

return

893

895

894

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

896

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

895

"""reproduce copies from fromrev to rev in the dirstate

897

"""reproduce copies from fromrev to rev in the dirstate

896

898

897

If skiprev is specified, it's a revision that should be used to

899

If skiprev is specified, it's a revision that should be used to

898

filter copy records. Any copies that occur between fromrev and

900

filter copy records. Any copies that occur between fromrev and

899

skiprev will not be duplicated, even if they appear in the set of

901

skiprev will not be duplicated, even if they appear in the set of

900

copies between fromrev and rev.

902

copies between fromrev and rev.

901

"""

903

"""

902

exclude = {}

904

exclude = {}

903

ctraceconfig = repo.ui.config('experimental', 'copytrace')

905

ctraceconfig = repo.ui.config('experimental', 'copytrace')

904

bctrace = stringutil.parsebool(ctraceconfig)

906

bctrace = stringutil.parsebool(ctraceconfig)

905

if (skiprev is not None and

907

if (skiprev is not None and

906

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

908

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

907

# copytrace='off' skips this line, but not the entire function because

909

# copytrace='off' skips this line, but not the entire function because

908

# the line below is O(size of the repo) during a rebase, while the rest

910

# the line below is O(size of the repo) during a rebase, while the rest

909

# of the function is much faster (and is required for carrying copy

911

# of the function is much faster (and is required for carrying copy

910

# metadata across the rebase anyway).

912

# metadata across the rebase anyway).

911

exclude = pathcopies(repo[fromrev], repo[skiprev])

913

exclude = pathcopies(repo[fromrev], repo[skiprev])

912

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

914

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

913

# copies.pathcopies returns backward renames, so dst might not

915

# copies.pathcopies returns backward renames, so dst might not

914

# actually be in the dirstate

916

# actually be in the dirstate

915

if dst in exclude:

917

if dst in exclude:

916

continue

918

continue

917

wctx[dst].markcopied(src)

919

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             def _findlimit(repo, ctxa, ctxb):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 wdirparents = None
                 a = ctxa.rev()
                 b = ctxb.rev()
                 if a is None:
                     wdirparents = (ctxa.p1(), ctxa.p2())
                     a = node.wdirrev
                 if b is None:
                     assert not wdirparents
                     wdirparents = (ctxb.p1(), ctxb.p2())
                     b = node.wdirrev
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 limit = node.wdirrev
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == node.wdirrev:
                         parents = [pctx.rev() for pctx in wdirparents]
                     else:
                         parents = cl.parentrevs(r)
                     if parents[1] == node.nullrev:
                         parents = parents[:1]
                     for p in parents:
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in list(t.items()):
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=node.nullrev):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and not f.isintroducedafter(limit):
                         return None
-            def _dirstatecopies(d, match=None):
+            def _dirstatecopies(repo, match=None):
-                ds = d._repo.dirstate
+                ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg('debug.copies:    looking into rename from %s to %s\n'
                         % (a, b))
                 limit = _findlimit(repo, a, b)
                 if debug:
                     dbg('debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg('debug.copies:      missing file to search: %d\n' % len(missing))
                 for f in missing:
                     if debug:
                         dbg('debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         if debug:
                             dbg('debug.copies:          rename of: %s\n' % ofctx._path)
                         cm[f] = ofctx.path()
                     if debug:
                         dbg('debug.copies:          time: %f seconds\n'
                             % (util.timer() - start))
                 return cm
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
-                        return _dirstatecopies(b, match)
+                        return _dirstatecopies(b._repo, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
-                    return _chain(a, b, cm, _dirstatecopies(b, match))
+                    return _chain(a, b, cm, _dirstatecopies(b._repo, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 if debug:
                     repo.ui.debug('debug.copies: searching copies from %s to %s\n'
                                   % (x, y))
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: forward\n')
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: backward\n')
                     return _backwardrenames(x, y)
                 if debug:
                     repo.ui.debug('debug.copies: search mode: combined\n')
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirfilenodeids:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
+                narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
-                    return repo.dirstate.copies(), {}, {}, {}, {}
+                    return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 boolctrace = stringutil.parsebool(copytracing)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 elif boolctrace is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.isancestorof(wc) is False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not base.isancestorof(_c1)
                 dirtyc2 = not base.isancestorof(_c2)
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1, c2)
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2,
                                                   m1.filesnotin(mta, repo.narrowmatch()),
                                                   m2.filesnotin(mta, repo.narrowmatch()),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if oc.linkrev() < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config('experimental', 'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if (skiprev is not None and
                     (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)