upstream/mercurial-mirror Commit - r41268:f3f5bfbf

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

from .utils import (

23

from .utils import (

24

stringutil,

24

stringutil,

25

)

25

)

26

27

def _findlimit(repo, a, b):

27

def _findlimit(repo, a, b):

28

"""

28

"""

29

Find the last revision that needs to be checked to ensure that a full

29

Find the last revision that needs to be checked to ensure that a full

30

transitive closure for file copies can be properly calculated.

30

transitive closure for file copies can be properly calculated.

31

Generally, this means finding the earliest revision number that's an

31

Generally, this means finding the earliest revision number that's an

32

ancestor of a or b but not both, except when a or b is a direct descendent

32

ancestor of a or b but not both, except when a or b is a direct descendent

33

of the other, in which case we can return the minimum revnum of a and b.

33

of the other, in which case we can return the minimum revnum of a and b.

34

None if no such revision exists.

34

None if no such revision exists.

35

"""

35

"""

36

37

# basic idea:

37

# basic idea:

38

# - mark a and b with different sides

38

# - mark a and b with different sides

39

# - if a parent's children are all on the same side, the parent is

39

# - if a parent's children are all on the same side, the parent is

40

# on that side, otherwise it is on no side

40

# on that side, otherwise it is on no side

41

# - walk the graph in topological order with the help of a heap;

41

# - walk the graph in topological order with the help of a heap;

42

# - add unseen parents to side map

42

# - add unseen parents to side map

43

# - clear side of any parent that has children on different sides

43

# - clear side of any parent that has children on different sides

44

# - track number of interesting revs that might still be on a side

44

# - track number of interesting revs that might still be on a side

45

# - track the lowest interesting rev seen

45

# - track the lowest interesting rev seen

46

# - quit when interesting revs is zero

46

# - quit when interesting revs is zero

47

48

cl = repo.changelog

48

cl = repo.changelog

49

if a is None:

49

if a is None:

50

a = node.wdirrev

50

a = node.wdirrev

51

if b is None:

51

if b is None:

52

b = node.wdirrev

52

b = node.wdirrev

53

54

side = {a: -1, b: 1}

54

side = {a: -1, b: 1}

55

visit = [-a, -b]

55

visit = [-a, -b]

56

heapq.heapify(visit)

56

heapq.heapify(visit)

57

interesting = len(visit)

57

interesting = len(visit)

58

hascommonancestor = False

58

hascommonancestor = False

59

limit = node.wdirrev

59

limit = node.wdirrev

60

61

while interesting:

61

while interesting:

62

r = -heapq.heappop(visit)

62

r = -heapq.heappop(visit)

63

if r == node.wdirrev:

63

if r == node.wdirrev:

64

parents = [cl.rev(p) for p in repo.dirstate.parents()]

64

parents = [cl.rev(p) for p in repo.dirstate.parents()]

65

else:

65

else:

66

parents = cl.parentrevs(r)

66

parents = cl.parentrevs(r)

67

for p in parents:

67

for p in parents:

68

if p < 0:

68

if p < 0:

69

continue

69

continue

70

if p not in side:

70

if p not in side:

71

# first time we see p; add it to visit

71

# first time we see p; add it to visit

72

side[p] = side[r]

72

side[p] = side[r]

73

if side[p]:

73

if side[p]:

74

interesting += 1

74

interesting += 1

75

heapq.heappush(visit, -p)

75

heapq.heappush(visit, -p)

76

elif side[p] and side[p] != side[r]:

76

elif side[p] and side[p] != side[r]:

77

# p was interesting but now we know better

77

# p was interesting but now we know better

78

side[p] = 0

78

side[p] = 0

79

interesting -= 1

79

interesting -= 1

80

hascommonancestor = True

80

hascommonancestor = True

81

if side[r]:

81

if side[r]:

82

limit = r # lowest rev visited

82

limit = r # lowest rev visited

83

interesting -= 1

83

interesting -= 1

84

85

if not hascommonancestor:

85

if not hascommonancestor:

86

return None

86

return None

87

88

# Consider the following flow (see test-commit-amend.t under issue4405):

88

# Consider the following flow (see test-commit-amend.t under issue4405):

89

# 1/ File 'a0' committed

89

# 1/ File 'a0' committed

90

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

90

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

91

# 3/ Move back to first commit

91

# 3/ Move back to first commit

92

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

92

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

93

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

93

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

94

#

94

#

95

# During the amend in step five, we will be in this state:

95

# During the amend in step five, we will be in this state:

96

#

96

#

97

# @ 3 temporary amend commit for a1-amend

97

# @ 3 temporary amend commit for a1-amend

98

# |

98

# |

99

# o 2 a1-amend

99

# o 2 a1-amend

100

# |

100

# |

101

# | o 1 a1

101

# | o 1 a1

102

# |/

102

# |/

103

# o 0 a0

103

# o 0 a0

104

#

104

#

105

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

105

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

106

# yet the filelog has the copy information in rev 1 and we will not look

106

# yet the filelog has the copy information in rev 1 and we will not look

107

# back far enough unless we also look at the a and b as candidates.

107

# back far enough unless we also look at the a and b as candidates.

108

# This only occurs when a is a descendent of b or visa-versa.

108

# This only occurs when a is a descendent of b or visa-versa.

109

return min(limit, a, b)

109

return min(limit, a, b)

110

111

def _chain(src, dst, a, b):

111

def _chain(src, dst, a, b):

112

"""chain two sets of copies a->b"""

112

"""chain two sets of copies a->b"""

113

t = a.copy()

113

t = a.copy()

114

for k, v in b.iteritems():

114

for k, v in b.iteritems():

115

if v in t:

115

if v in t:

116

# found a chain

116

# found a chain

117

if t[v] != k:

117

if t[v] != k:

118

# file wasn't renamed back to itself

118

# file wasn't renamed back to itself

119

t[k] = t[v]

119

t[k] = t[v]

120

if v not in dst:

120

if v not in dst:

121

# chain was a rename, not a copy

121

# chain was a rename, not a copy

122

del t[v]

122

del t[v]

123

if v in src:

123

if v in src:

124

# file is a copy of an existing file

124

# file is a copy of an existing file

125

t[k] = v

125

t[k] = v

126

127

# remove criss-crossed copies

127

# remove criss-crossed copies

128

for k, v in list(t.items()):

128

for k, v in list(t.items()):

129

if k in src and v in dst:

129

if k in src and v in dst:

130

del t[k]

130

del t[k]

131

132

return t

132

return t

133

134

def _tracefile(fctx, am, limit=-1):

134

def _tracefile(fctx, am, limit=node.nullrev):

135

"""return file context that is the ancestor of fctx present in ancestor

135

"""return file context that is the ancestor of fctx present in ancestor

136

manifest am, stopping after the first ancestor lower than limit"""

136

manifest am, stopping after the first ancestor lower than limit"""

137

138

for f in fctx.ancestors():

138

for f in fctx.ancestors():

139

if am.get(f.path(), None) == f.filenode():

139

if am.get(f.path(), None) == f.filenode():

140

return f

140

return f

141

if limit >= 0 and not f.isintroducedafter(limit):

141

if limit >= 0 and not f.isintroducedafter(limit):

142

return None

142

return None

143

144

def _dirstatecopies(d, match=None):

144

def _dirstatecopies(d, match=None):

145

ds = d._repo.dirstate

145

ds = d._repo.dirstate

146

c = ds.copies().copy()

146

c = ds.copies().copy()

147

for k in list(c):

147

for k in list(c):

148

if ds[k] not in 'anm' or (match and not match(k)):

148

if ds[k] not in 'anm' or (match and not match(k)):

149

del c[k]

149

del c[k]

150

return c

150

return c

151

152

def _computeforwardmissing(a, b, match=None):

152

def _computeforwardmissing(a, b, match=None):

153

"""Computes which files are in b but not a.

153

"""Computes which files are in b but not a.

154

This is its own function so extensions can easily wrap this call to see what

154

This is its own function so extensions can easily wrap this call to see what

155

files _forwardcopies is about to process.

155

files _forwardcopies is about to process.

156

"""

156

"""

157

ma = a.manifest()

157

ma = a.manifest()

158

mb = b.manifest()

158

mb = b.manifest()

159

return mb.filesnotin(ma, match=match)

159

return mb.filesnotin(ma, match=match)

160

161

def _committedforwardcopies(a, b, match):

161

def _committedforwardcopies(a, b, match):

162

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

162

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

163

# files might have to be traced back to the fctx parent of the last

163

# files might have to be traced back to the fctx parent of the last

164

# one-side-only changeset, but not further back than that

164

# one-side-only changeset, but not further back than that

165

repo = a._repo

165

repo = a._repo

166

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

166

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

167

dbg = repo.ui.debug

167

dbg = repo.ui.debug

168

if debug:

168

if debug:

169

dbg('debug.copies: looking into rename from %s to %s\n'

169

dbg('debug.copies: looking into rename from %s to %s\n'

170

% (a, b))

170

% (a, b))

171

limit = _findlimit(repo, a.rev(), b.rev())

171

limit = _findlimit(repo, a.rev(), b.rev())

172

if limit is None:

172

if limit is None:

173

limit = -1

173

limit = node.nullrev

174

if debug:

174

if debug:

175

dbg('debug.copies: search limit: %d\n' % limit)

175

dbg('debug.copies: search limit: %d\n' % limit)

176

am = a.manifest()

176

am = a.manifest()

177

178

# find where new files came from

178

# find where new files came from

179

# we currently don't try to find where old files went, too expensive

179

# we currently don't try to find where old files went, too expensive

180

# this means we can miss a case like 'hg rm b; hg cp a b'

180

# this means we can miss a case like 'hg rm b; hg cp a b'

181

cm = {}

181

cm = {}

182

183

# Computing the forward missing is quite expensive on large manifests, since

183

# Computing the forward missing is quite expensive on large manifests, since

184

# it compares the entire manifests. We can optimize it in the common use

184

# it compares the entire manifests. We can optimize it in the common use

185

# case of computing what copies are in a commit versus its parent (like

185

# case of computing what copies are in a commit versus its parent (like

186

# during a rebase or histedit). Note, we exclude merge commits from this

186

# during a rebase or histedit). Note, we exclude merge commits from this

187

# optimization, since the ctx.files() for a merge commit is not correct for

187

# optimization, since the ctx.files() for a merge commit is not correct for

188

# this comparison.

188

# this comparison.

189

forwardmissingmatch = match

189

forwardmissingmatch = match

190

if b.p1() == a and b.p2().node() == node.nullid:

190

if b.p1() == a and b.p2().node() == node.nullid:

191

filesmatcher = scmutil.matchfiles(a._repo, b.files())

191

filesmatcher = scmutil.matchfiles(a._repo, b.files())

192

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

192

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

193

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

193

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

194

195

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

195

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

196

197

if debug:

197

if debug:

198

dbg('debug.copies: missing file to search: %d\n' % len(missing))

198

dbg('debug.copies: missing file to search: %d\n' % len(missing))

199

200

for f in missing:

200

for f in missing:

201

if debug:

201

if debug:

202

dbg('debug.copies: tracing file: %s\n' % f)

202

dbg('debug.copies: tracing file: %s\n' % f)

203

fctx = b[f]

203

fctx = b[f]

204

fctx._ancestrycontext = ancestrycontext

204

fctx._ancestrycontext = ancestrycontext

205

206

if debug:

206

if debug:

207

start = util.timer()

207

start = util.timer()

208

ofctx = _tracefile(fctx, am, limit)

208

ofctx = _tracefile(fctx, am, limit)

209

if ofctx:

209

if ofctx:

210

if debug:

210

if debug:

211

dbg('debug.copies: rename of: %s\n' % ofctx._path)

211

dbg('debug.copies: rename of: %s\n' % ofctx._path)

212

cm[f] = ofctx.path()

212

cm[f] = ofctx.path()

213

if debug:

213

if debug:

214

dbg('debug.copies: time: %f seconds\n'

214

dbg('debug.copies: time: %f seconds\n'

215

% (util.timer() - start))

215

% (util.timer() - start))

216

return cm

216

return cm

217

218

def _forwardcopies(a, b, match=None):

218

def _forwardcopies(a, b, match=None):

219

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

219

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

220

221

match = a.repo().narrowmatch(match)

221

match = a.repo().narrowmatch(match)

222

# check for working copy

222

# check for working copy

223

if b.rev() is None:

223

if b.rev() is None:

224

if a == b.p1():

224

if a == b.p1():

225

# short-circuit to avoid issues with merge states

225

# short-circuit to avoid issues with merge states

226

return _dirstatecopies(b, match)

226

return _dirstatecopies(b, match)

227

228

cm = _committedforwardcopies(a, b.p1(), match)

228

cm = _committedforwardcopies(a, b.p1(), match)

229

# combine copies from dirstate if necessary

229

# combine copies from dirstate if necessary

230

return _chain(a, b, cm, _dirstatecopies(b, match))

230

return _chain(a, b, cm, _dirstatecopies(b, match))

231

return _committedforwardcopies(a, b, match)

231

return _committedforwardcopies(a, b, match)

232

233

def _backwardrenames(a, b):

233

def _backwardrenames(a, b):

234

if a._repo.ui.config('experimental', 'copytrace') == 'off':

234

if a._repo.ui.config('experimental', 'copytrace') == 'off':

235

return {}

235

return {}

236

237

# Even though we're not taking copies into account, 1:n rename situations

237

# Even though we're not taking copies into account, 1:n rename situations

238

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

238

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

239

# arbitrarily pick one of the renames.

239

# arbitrarily pick one of the renames.

240

f = _forwardcopies(b, a)

240

f = _forwardcopies(b, a)

241

r = {}

241

r = {}

242

for k, v in sorted(f.iteritems()):

242

for k, v in sorted(f.iteritems()):

243

# remove copies

243

# remove copies

244

if v in a:

244

if v in a:

245

continue

245

continue

246

r[v] = k

246

r[v] = k

247

return r

247

return r

248

249

def pathcopies(x, y, match=None):

249

def pathcopies(x, y, match=None):

250

"""find {dst@y: src@x} copy mapping for directed compare"""

250

"""find {dst@y: src@x} copy mapping for directed compare"""

251

repo = x._repo

251

repo = x._repo

252

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

252

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

253

if debug:

253

if debug:

254

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

254

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

255

% (x, y))

255

% (x, y))

256

if x == y or not x or not y:

256

if x == y or not x or not y:

257

return {}

257

return {}

258

a = y.ancestor(x)

258

a = y.ancestor(x)

259

if a == x:

259

if a == x:

260

if debug:

260

if debug:

261

repo.ui.debug('debug.copies: search mode: forward\n')

261

repo.ui.debug('debug.copies: search mode: forward\n')

262

return _forwardcopies(x, y, match=match)

262

return _forwardcopies(x, y, match=match)

263

if a == y:

263

if a == y:

264

if debug:

264

if debug:

265

repo.ui.debug('debug.copies: search mode: backward\n')

265

repo.ui.debug('debug.copies: search mode: backward\n')

266

return _backwardrenames(x, y)

266

return _backwardrenames(x, y)

267

if debug:

267

if debug:

268

repo.ui.debug('debug.copies: search mode: combined\n')

268

repo.ui.debug('debug.copies: search mode: combined\n')

269

return _chain(x, y, _backwardrenames(x, a),

269

return _chain(x, y, _backwardrenames(x, a),

270

_forwardcopies(a, y, match=match))

270

_forwardcopies(a, y, match=match))

271

272

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

272

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

273

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

273

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

274

and c2. This is its own function so extensions can easily wrap this call

274

and c2. This is its own function so extensions can easily wrap this call

275

to see what files mergecopies is about to process.

275

to see what files mergecopies is about to process.

276

277

Even though c1 and c2 are not used in this function, they are useful in

277

Even though c1 and c2 are not used in this function, they are useful in

278

other extensions for being able to read the file nodes of the changed files.

278

other extensions for being able to read the file nodes of the changed files.

279

280

"baselabel" can be passed to help distinguish the multiple computations

280

"baselabel" can be passed to help distinguish the multiple computations

281

done in the graft case.

281

done in the graft case.

282

"""

282

"""

283

u1 = sorted(addedinm1 - addedinm2)

283

u1 = sorted(addedinm1 - addedinm2)

284

u2 = sorted(addedinm2 - addedinm1)

284

u2 = sorted(addedinm2 - addedinm1)

285

286

header = " unmatched files in %s"

286

header = " unmatched files in %s"

287

if baselabel:

287

if baselabel:

288

header += ' (from %s)' % baselabel

288

header += ' (from %s)' % baselabel

289

if u1:

289

if u1:

290

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

290

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

291

if u2:

291

if u2:

292

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

292

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

293

294

return u1, u2

294

return u1, u2

295

296

def _makegetfctx(ctx):

296

def _makegetfctx(ctx):

297

"""return a 'getfctx' function suitable for _checkcopies usage

297

"""return a 'getfctx' function suitable for _checkcopies usage

298

299

We have to re-setup the function building 'filectx' for each

299

We have to re-setup the function building 'filectx' for each

300

'_checkcopies' to ensure the linkrev adjustment is properly setup for

300

'_checkcopies' to ensure the linkrev adjustment is properly setup for

301

each. Linkrev adjustment is important to avoid bug in rename

301

each. Linkrev adjustment is important to avoid bug in rename

302

detection. Moreover, having a proper '_ancestrycontext' setup ensures

302

detection. Moreover, having a proper '_ancestrycontext' setup ensures

303

the performance impact of this adjustment is kept limited. Without it,

303

the performance impact of this adjustment is kept limited. Without it,

304

each file could do a full dag traversal making the time complexity of

304

each file could do a full dag traversal making the time complexity of

305

the operation explode (see issue4537).

305

the operation explode (see issue4537).

306

307

This function exists here mostly to limit the impact on stable. Feel

307

This function exists here mostly to limit the impact on stable. Feel

308

free to refactor on default.

308

free to refactor on default.

309

"""

309

"""

310

rev = ctx.rev()

310

rev = ctx.rev()

311

repo = ctx._repo

311

repo = ctx._repo

312

ac = getattr(ctx, '_ancestrycontext', None)

312

ac = getattr(ctx, '_ancestrycontext', None)

313

if ac is None:

313

if ac is None:

314

revs = [rev]

314

revs = [rev]

315

if rev is None:

315

if rev is None:

316

revs = [p.rev() for p in ctx.parents()]

316

revs = [p.rev() for p in ctx.parents()]

317

ac = repo.changelog.ancestors(revs, inclusive=True)

317

ac = repo.changelog.ancestors(revs, inclusive=True)

318

ctx._ancestrycontext = ac

318

ctx._ancestrycontext = ac

319

def makectx(f, n):

319

def makectx(f, n):

320

if n in node.wdirfilenodeids: # in a working context?

320

if n in node.wdirfilenodeids: # in a working context?

321

if ctx.rev() is None:

321

if ctx.rev() is None:

322

return ctx.filectx(f)

322

return ctx.filectx(f)

323

return repo[None][f]

323

return repo[None][f]

324

fctx = repo.filectx(f, fileid=n)

324

fctx = repo.filectx(f, fileid=n)

325

# setup only needed for filectx not create from a changectx

325

# setup only needed for filectx not create from a changectx

326

fctx._ancestrycontext = ac

326

fctx._ancestrycontext = ac

327

fctx._descendantrev = rev

327

fctx._descendantrev = rev

328

return fctx

328

return fctx

329

return util.lrucachefunc(makectx)

329

return util.lrucachefunc(makectx)

330

331

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

331

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

332

"""combine partial copy paths"""

332

"""combine partial copy paths"""

333

remainder = {}

333

remainder = {}

334

for f in copyfrom:

334

for f in copyfrom:

335

if f in copyto:

335

if f in copyto:

336

finalcopy[copyto[f]] = copyfrom[f]

336

finalcopy[copyto[f]] = copyfrom[f]

337

del copyto[f]

337

del copyto[f]

338

for f in incompletediverge:

338

for f in incompletediverge:

339

assert f not in diverge

339

assert f not in diverge

340

ic = incompletediverge[f]

340

ic = incompletediverge[f]

341

if ic[0] in copyto:

341

if ic[0] in copyto:

342

diverge[f] = [copyto[ic[0]], ic[1]]

342

diverge[f] = [copyto[ic[0]], ic[1]]

343

else:

343

else:

344

remainder[f] = ic

344

remainder[f] = ic

345

return remainder

345

return remainder

346

347

def mergecopies(repo, c1, c2, base):

347

def mergecopies(repo, c1, c2, base):

348

"""

348

"""

349

The function calling different copytracing algorithms on the basis of config

349

The function calling different copytracing algorithms on the basis of config

350

which find moves and copies between context c1 and c2 that are relevant for

350

which find moves and copies between context c1 and c2 that are relevant for

351

merging. 'base' will be used as the merge base.

351

merging. 'base' will be used as the merge base.

352

353

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

353

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

354

files that were moved/ copied in one merge parent and modified in another.

354

files that were moved/ copied in one merge parent and modified in another.

355

For example:

355

For example:

356

357

o ---> 4 another commit

357

o ---> 4 another commit

358

|

358

|

359

| o ---> 3 commit that modifies a.txt

359

| o ---> 3 commit that modifies a.txt

360

| /

360

| /

361

o / ---> 2 commit that moves a.txt to b.txt

361

o / ---> 2 commit that moves a.txt to b.txt

362

|/

362

|/

363

o ---> 1 merge base

363

o ---> 1 merge base

364

365

If we try to rebase revision 3 on revision 4, since there is no a.txt in

365

If we try to rebase revision 3 on revision 4, since there is no a.txt in

366

revision 4, and if user have copytrace disabled, we prints the following

366

revision 4, and if user have copytrace disabled, we prints the following

367

message:

367

message:

368

369

```other changed <file> which local deleted```

369

```other changed <file> which local deleted```

370

371

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

371

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

372

"dirmove".

372

"dirmove".

373

374

"copy" is a mapping from destination name -> source name,

374

"copy" is a mapping from destination name -> source name,

375

where source is in c1 and destination is in c2 or vice-versa.

375

where source is in c1 and destination is in c2 or vice-versa.

376

377

"movewithdir" is a mapping from source name -> destination name,

377

"movewithdir" is a mapping from source name -> destination name,

378

where the file at source present in one context but not the other

378

where the file at source present in one context but not the other

379

needs to be moved to destination by the merge process, because the

379

needs to be moved to destination by the merge process, because the

380

other context moved the directory it is in.

380

other context moved the directory it is in.

381

382

"diverge" is a mapping of source name -> list of destination names

382

"diverge" is a mapping of source name -> list of destination names

383

for divergent renames.

383

for divergent renames.

384

385

"renamedelete" is a mapping of source name -> list of destination

385

"renamedelete" is a mapping of source name -> list of destination

386

names for files deleted in c1 that were renamed in c2 or vice-versa.

386

names for files deleted in c1 that were renamed in c2 or vice-versa.

387

388

"dirmove" is a mapping of detected source dir -> destination dir renames.

388

"dirmove" is a mapping of detected source dir -> destination dir renames.

389

This is needed for handling changes to new files previously grafted into

389

This is needed for handling changes to new files previously grafted into

390

renamed directories.

390

renamed directories.

391

"""

391

"""

392

# avoid silly behavior for update from empty dir

392

# avoid silly behavior for update from empty dir

393

if not c1 or not c2 or c1 == c2:

393

if not c1 or not c2 or c1 == c2:

394

return {}, {}, {}, {}, {}

394

return {}, {}, {}, {}, {}

395

396

# avoid silly behavior for parent -> working dir

396

# avoid silly behavior for parent -> working dir

397

if c2.node() is None and c1.node() == repo.dirstate.p1():

397

if c2.node() is None and c1.node() == repo.dirstate.p1():

398

return repo.dirstate.copies(), {}, {}, {}, {}

398

return repo.dirstate.copies(), {}, {}, {}, {}

399

400

copytracing = repo.ui.config('experimental', 'copytrace')

400

copytracing = repo.ui.config('experimental', 'copytrace')

401

boolctrace = stringutil.parsebool(copytracing)

401

boolctrace = stringutil.parsebool(copytracing)

402

403

# Copy trace disabling is explicitly below the node == p1 logic above

403

# Copy trace disabling is explicitly below the node == p1 logic above

404

# because the logic above is required for a simple copy to be kept across a

404

# because the logic above is required for a simple copy to be kept across a

405

# rebase.

405

# rebase.

406

if copytracing == 'heuristics':

406

if copytracing == 'heuristics':

407

# Do full copytracing if only non-public revisions are involved as

407

# Do full copytracing if only non-public revisions are involved as

408

# that will be fast enough and will also cover the copies which could

408

# that will be fast enough and will also cover the copies which could

409

# be missed by heuristics

409

# be missed by heuristics

410

if _isfullcopytraceable(repo, c1, base):

410

if _isfullcopytraceable(repo, c1, base):

411

return _fullcopytracing(repo, c1, c2, base)

411

return _fullcopytracing(repo, c1, c2, base)

412

return _heuristicscopytracing(repo, c1, c2, base)

412

return _heuristicscopytracing(repo, c1, c2, base)

413

elif boolctrace is False:

413

elif boolctrace is False:

414

# stringutil.parsebool() returns None when it is unable to parse the

414

# stringutil.parsebool() returns None when it is unable to parse the

415

# value, so we should rely on making sure copytracing is on such cases

415

# value, so we should rely on making sure copytracing is on such cases

416

return {}, {}, {}, {}, {}

416

return {}, {}, {}, {}, {}

417

else:

417

else:

418

return _fullcopytracing(repo, c1, c2, base)

418

return _fullcopytracing(repo, c1, c2, base)

419

420

def _isfullcopytraceable(repo, c1, base):

420

def _isfullcopytraceable(repo, c1, base):

421

""" Checks that if base, source and destination are all no-public branches,

421

""" Checks that if base, source and destination are all no-public branches,

422

if yes let's use the full copytrace algorithm for increased capabilities

422

if yes let's use the full copytrace algorithm for increased capabilities

423

since it will be fast enough.

423

since it will be fast enough.

424

425

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

425

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

426

number of changesets from c1 to base such that if number of changesets are

426

number of changesets from c1 to base such that if number of changesets are

427

more than the limit, full copytracing algorithm won't be used.

427

more than the limit, full copytracing algorithm won't be used.

428

"""

428

"""

429

if c1.rev() is None:

429

if c1.rev() is None:

430

c1 = c1.p1()

430

c1 = c1.p1()

431

if c1.mutable() and base.mutable():

431

if c1.mutable() and base.mutable():

432

sourcecommitlimit = repo.ui.configint('experimental',

432

sourcecommitlimit = repo.ui.configint('experimental',

433

'copytrace.sourcecommitlimit')

433

'copytrace.sourcecommitlimit')

434

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

434

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

435

return commits < sourcecommitlimit

435

return commits < sourcecommitlimit

436

return False

436

return False

437

438

def _fullcopytracing(repo, c1, c2, base):

438

def _fullcopytracing(repo, c1, c2, base):

439

""" The full copytracing algorithm which finds all the new files that were

439

""" The full copytracing algorithm which finds all the new files that were

440

added from merge base up to the top commit and for each file it checks if

440

added from merge base up to the top commit and for each file it checks if

441

this file was copied from another file.

441

this file was copied from another file.

442

443

This is pretty slow when a lot of changesets are involved but will track all

443

This is pretty slow when a lot of changesets are involved but will track all

444

the copies.

444

the copies.

445

"""

445

"""

446

# In certain scenarios (e.g. graft, update or rebase), base can be

446

# In certain scenarios (e.g. graft, update or rebase), base can be

447

# overridden We still need to know a real common ancestor in this case We

447

# overridden We still need to know a real common ancestor in this case We

448

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

448

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

449

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

449

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

450

# caller may not know if the revision passed in lieu of the CA is a genuine

450

# caller may not know if the revision passed in lieu of the CA is a genuine

451

# common ancestor or not without explicitly checking it, it's better to

451

# common ancestor or not without explicitly checking it, it's better to

452

# determine that here.

452

# determine that here.

453

#

453

#

454

# base.isancestorof(wc) is False, work around that

454

# base.isancestorof(wc) is False, work around that

455

_c1 = c1.p1() if c1.rev() is None else c1

455

_c1 = c1.p1() if c1.rev() is None else c1

456

_c2 = c2.p1() if c2.rev() is None else c2

456

_c2 = c2.p1() if c2.rev() is None else c2

457

# an endpoint is "dirty" if it isn't a descendant of the merge base

457

# an endpoint is "dirty" if it isn't a descendant of the merge base

458

# if we have a dirty endpoint, we need to trigger graft logic, and also

458

# if we have a dirty endpoint, we need to trigger graft logic, and also

459

# keep track of which endpoint is dirty

459

# keep track of which endpoint is dirty

460

dirtyc1 = not base.isancestorof(_c1)

460

dirtyc1 = not base.isancestorof(_c1)

461

dirtyc2 = not base.isancestorof(_c2)

461

dirtyc2 = not base.isancestorof(_c2)

462

graft = dirtyc1 or dirtyc2

462

graft = dirtyc1 or dirtyc2

463

tca = base

463

tca = base

464

if graft:

464

if graft:

465

tca = _c1.ancestor(_c2)

465

tca = _c1.ancestor(_c2)

466

467

limit = _findlimit(repo, c1.rev(), c2.rev())

467

limit = _findlimit(repo, c1.rev(), c2.rev())

468

if limit is None:

468

if limit is None:

469

# no common ancestor, no copies

469

# no common ancestor, no copies

470

return {}, {}, {}, {}, {}

470

return {}, {}, {}, {}, {}

471

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

471

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

472

473

m1 = c1.manifest()

473

m1 = c1.manifest()

474

m2 = c2.manifest()

474

m2 = c2.manifest()

475

mb = base.manifest()

475

mb = base.manifest()

476

477

# gather data from _checkcopies:

477

# gather data from _checkcopies:

478

# - diverge = record all diverges in this dict

478

# - diverge = record all diverges in this dict

479

# - copy = record all non-divergent copies in this dict

479

# - copy = record all non-divergent copies in this dict

480

# - fullcopy = record all copies in this dict

480

# - fullcopy = record all copies in this dict

481

# - incomplete = record non-divergent partial copies here

481

# - incomplete = record non-divergent partial copies here

482

# - incompletediverge = record divergent partial copies here

482

# - incompletediverge = record divergent partial copies here

483

diverge = {} # divergence data is shared

483

diverge = {} # divergence data is shared

484

incompletediverge = {}

484

incompletediverge = {}

485

data1 = {'copy': {},

485

data1 = {'copy': {},

486

'fullcopy': {},

486

'fullcopy': {},

487

'incomplete': {},

487

'incomplete': {},

488

'diverge': diverge,

488

'diverge': diverge,

489

'incompletediverge': incompletediverge,

489

'incompletediverge': incompletediverge,

490

}

490

}

491

data2 = {'copy': {},

491

data2 = {'copy': {},

492

'fullcopy': {},

492

'fullcopy': {},

493

'incomplete': {},

493

'incomplete': {},

494

'diverge': diverge,

494

'diverge': diverge,

495

'incompletediverge': incompletediverge,

495

'incompletediverge': incompletediverge,

496

}

496

}

497

498

# find interesting file sets from manifests

498

# find interesting file sets from manifests

499

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

499

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

500

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

500

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

501

bothnew = sorted(addedinm1 & addedinm2)

501

bothnew = sorted(addedinm1 & addedinm2)

502

if tca == base:

502

if tca == base:

503

# unmatched file from base

503

# unmatched file from base

504

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

504

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

505

u1u, u2u = u1r, u2r

505

u1u, u2u = u1r, u2r

506

else:

506

else:

507

# unmatched file from base (DAG rotation in the graft case)

507

# unmatched file from base (DAG rotation in the graft case)

508

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

508

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

509

baselabel='base')

509

baselabel='base')

510

# unmatched file from topological common ancestors (no DAG rotation)

510

# unmatched file from topological common ancestors (no DAG rotation)

511

# need to recompute this for directory move handling when grafting

511

# need to recompute this for directory move handling when grafting

512

mta = tca.manifest()

512

mta = tca.manifest()

513

u1u, u2u = _computenonoverlap(repo, c1, c2,

513

u1u, u2u = _computenonoverlap(repo, c1, c2,

514

m1.filesnotin(mta, repo.narrowmatch()),

514

m1.filesnotin(mta, repo.narrowmatch()),

515

m2.filesnotin(mta, repo.narrowmatch()),

515

m2.filesnotin(mta, repo.narrowmatch()),

516

baselabel='topological common ancestor')

516

baselabel='topological common ancestor')

517

518

for f in u1u:

518

for f in u1u:

519

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

519

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

520

521

for f in u2u:

521

for f in u2u:

522

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

522

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

523

524

copy = dict(data1['copy'])

524

copy = dict(data1['copy'])

525

copy.update(data2['copy'])

525

copy.update(data2['copy'])

526

fullcopy = dict(data1['fullcopy'])

526

fullcopy = dict(data1['fullcopy'])

527

fullcopy.update(data2['fullcopy'])

527

fullcopy.update(data2['fullcopy'])

528

529

if dirtyc1:

529

if dirtyc1:

530

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

530

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

531

incompletediverge)

531

incompletediverge)

532

else:

532

else:

533

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

533

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

534

incompletediverge)

534

incompletediverge)

535

536

renamedelete = {}

536

renamedelete = {}

537

renamedeleteset = set()

537

renamedeleteset = set()

538

divergeset = set()

538

divergeset = set()

539

for of, fl in list(diverge.items()):

539

for of, fl in list(diverge.items()):

540

if len(fl) == 1 or of in c1 or of in c2:

540

if len(fl) == 1 or of in c1 or of in c2:

541

del diverge[of] # not actually divergent, or not a rename

541

del diverge[of] # not actually divergent, or not a rename

542

if of not in c1 and of not in c2:

542

if of not in c1 and of not in c2:

543

# renamed on one side, deleted on the other side, but filter

543

# renamed on one side, deleted on the other side, but filter

544

# out files that have been renamed and then deleted

544

# out files that have been renamed and then deleted

545

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

545

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

546

renamedeleteset.update(fl) # reverse map for below

546

renamedeleteset.update(fl) # reverse map for below

547

else:

547

else:

548

divergeset.update(fl) # reverse map for below

548

divergeset.update(fl) # reverse map for below

549

550

if bothnew:

550

if bothnew:

551

repo.ui.debug(" unmatched files new in both:\n %s\n"

551

repo.ui.debug(" unmatched files new in both:\n %s\n"

552

% "\n ".join(bothnew))

552

% "\n ".join(bothnew))

553

bothdiverge = {}

553

bothdiverge = {}

554

bothincompletediverge = {}

554

bothincompletediverge = {}

555

remainder = {}

555

remainder = {}

556

both1 = {'copy': {},

556

both1 = {'copy': {},

557

'fullcopy': {},

557

'fullcopy': {},

558

'incomplete': {},

558

'incomplete': {},

559

'diverge': bothdiverge,

559

'diverge': bothdiverge,

560

'incompletediverge': bothincompletediverge

560

'incompletediverge': bothincompletediverge

561

}

561

}

562

both2 = {'copy': {},

562

both2 = {'copy': {},

563

'fullcopy': {},

563

'fullcopy': {},

564

'incomplete': {},

564

'incomplete': {},

565

'diverge': bothdiverge,

565

'diverge': bothdiverge,

566

'incompletediverge': bothincompletediverge

566

'incompletediverge': bothincompletediverge

567

}

567

}

568

for f in bothnew:

568

for f in bothnew:

569

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

569

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

570

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

570

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

571

if dirtyc1:

571

if dirtyc1:

572

# incomplete copies may only be found on the "dirty" side for bothnew

572

# incomplete copies may only be found on the "dirty" side for bothnew

573

assert not both2['incomplete']

573

assert not both2['incomplete']

574

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

574

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

575

bothincompletediverge)

575

bothincompletediverge)

576

elif dirtyc2:

576

elif dirtyc2:

577

assert not both1['incomplete']

577

assert not both1['incomplete']

578

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

578

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

579

bothincompletediverge)

579

bothincompletediverge)

580

else:

580

else:

581

# incomplete copies and divergences can't happen outside grafts

581

# incomplete copies and divergences can't happen outside grafts

582

assert not both1['incomplete']

582

assert not both1['incomplete']

583

assert not both2['incomplete']

583

assert not both2['incomplete']

584

assert not bothincompletediverge

584

assert not bothincompletediverge

585

for f in remainder:

585

for f in remainder:

586

assert f not in bothdiverge

586

assert f not in bothdiverge

587

ic = remainder[f]

587

ic = remainder[f]

588

if ic[0] in (m1 if dirtyc1 else m2):

588

if ic[0] in (m1 if dirtyc1 else m2):

589

# backed-out rename on one side, but watch out for deleted files

589

# backed-out rename on one side, but watch out for deleted files

590

bothdiverge[f] = ic

590

bothdiverge[f] = ic

591

for of, fl in bothdiverge.items():

591

for of, fl in bothdiverge.items():

592

if len(fl) == 2 and fl[0] == fl[1]:

592

if len(fl) == 2 and fl[0] == fl[1]:

593

copy[fl[0]] = of # not actually divergent, just matching renames

593

copy[fl[0]] = of # not actually divergent, just matching renames

594

595

if fullcopy and repo.ui.debugflag:

595

if fullcopy and repo.ui.debugflag:

596

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

596

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

597

"% = renamed and deleted):\n")

597

"% = renamed and deleted):\n")

598

for f in sorted(fullcopy):

598

for f in sorted(fullcopy):

599

note = ""

599

note = ""

600

if f in copy:

600

if f in copy:

601

note += "*"

601

note += "*"

602

if f in divergeset:

602

if f in divergeset:

603

note += "!"

603

note += "!"

604

if f in renamedeleteset:

604

if f in renamedeleteset:

605

note += "%"

605

note += "%"

606

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

606

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

607

note))

607

note))

608

del divergeset

608

del divergeset

609

610

if not fullcopy:

610

if not fullcopy:

611

return copy, {}, diverge, renamedelete, {}

611

return copy, {}, diverge, renamedelete, {}

612

613

repo.ui.debug(" checking for directory renames\n")

613

repo.ui.debug(" checking for directory renames\n")

614

615

# generate a directory move map

615

# generate a directory move map

616

d1, d2 = c1.dirs(), c2.dirs()

616

d1, d2 = c1.dirs(), c2.dirs()

617

# Hack for adding '', which is not otherwise added, to d1 and d2

617

# Hack for adding '', which is not otherwise added, to d1 and d2

618

d1.addpath('/')

618

d1.addpath('/')

619

d2.addpath('/')

619

d2.addpath('/')

620

invalid = set()

620

invalid = set()

621

dirmove = {}

621

dirmove = {}

622

623

# examine each file copy for a potential directory move, which is

623

# examine each file copy for a potential directory move, which is

624

# when all the files in a directory are moved to a new directory

624

# when all the files in a directory are moved to a new directory

625

for dst, src in fullcopy.iteritems():

625

for dst, src in fullcopy.iteritems():

626

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

626

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

627

if dsrc in invalid:

627

if dsrc in invalid:

628

# already seen to be uninteresting

628

# already seen to be uninteresting

629

continue

629

continue

630

elif dsrc in d1 and ddst in d1:

630

elif dsrc in d1 and ddst in d1:

631

# directory wasn't entirely moved locally

631

# directory wasn't entirely moved locally

632

invalid.add(dsrc)

632

invalid.add(dsrc)

633

elif dsrc in d2 and ddst in d2:

633

elif dsrc in d2 and ddst in d2:

634

# directory wasn't entirely moved remotely

634

# directory wasn't entirely moved remotely

635

invalid.add(dsrc)

635

invalid.add(dsrc)

636

elif dsrc in dirmove and dirmove[dsrc] != ddst:

636

elif dsrc in dirmove and dirmove[dsrc] != ddst:

637

# files from the same directory moved to two different places

637

# files from the same directory moved to two different places

638

invalid.add(dsrc)

638

invalid.add(dsrc)

639

else:

639

else:

640

# looks good so far

640

# looks good so far

641

dirmove[dsrc] = ddst

641

dirmove[dsrc] = ddst

642

643

for i in invalid:

643

for i in invalid:

644

if i in dirmove:

644

if i in dirmove:

645

del dirmove[i]

645

del dirmove[i]

646

del d1, d2, invalid

646

del d1, d2, invalid

647

648

if not dirmove:

648

if not dirmove:

649

return copy, {}, diverge, renamedelete, {}

649

return copy, {}, diverge, renamedelete, {}

650

651

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

651

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

652

653

for d in dirmove:

653

for d in dirmove:

654

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

654

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

655

(d, dirmove[d]))

655

(d, dirmove[d]))

656

657

movewithdir = {}

657

movewithdir = {}

658

# check unaccounted nonoverlapping files against directory moves

658

# check unaccounted nonoverlapping files against directory moves

659

for f in u1r + u2r:

659

for f in u1r + u2r:

660

if f not in fullcopy:

660

if f not in fullcopy:

661

for d in dirmove:

661

for d in dirmove:

662

if f.startswith(d):

662

if f.startswith(d):

663

# new file added in a directory that was moved, move it

663

# new file added in a directory that was moved, move it

664

df = dirmove[d] + f[len(d):]

664

df = dirmove[d] + f[len(d):]

665

if df not in copy:

665

if df not in copy:

666

movewithdir[f] = df

666

movewithdir[f] = df

667

repo.ui.debug((" pending file src: '%s' -> "

667

repo.ui.debug((" pending file src: '%s' -> "

668

"dst: '%s'\n") % (f, df))

668

"dst: '%s'\n") % (f, df))

669

break

669

break

670

671

return copy, movewithdir, diverge, renamedelete, dirmove

671

return copy, movewithdir, diverge, renamedelete, dirmove

672

673

def _heuristicscopytracing(repo, c1, c2, base):

673

def _heuristicscopytracing(repo, c1, c2, base):

674

""" Fast copytracing using filename heuristics

674

""" Fast copytracing using filename heuristics

675

676

Assumes that moves or renames are of following two types:

676

Assumes that moves or renames are of following two types:

677

678

1) Inside a directory only (same directory name but different filenames)

678

1) Inside a directory only (same directory name but different filenames)

679

2) Move from one directory to another

679

2) Move from one directory to another

680

(same filenames but different directory names)

680

(same filenames but different directory names)

681

682

Works only when there are no merge commits in the "source branch".

682

Works only when there are no merge commits in the "source branch".

683

Source branch is commits from base up to c2 not including base.

683

Source branch is commits from base up to c2 not including base.

684

685

If merge is involved it fallbacks to _fullcopytracing().

685

If merge is involved it fallbacks to _fullcopytracing().

686

687

Can be used by setting the following config:

687

Can be used by setting the following config:

688

689

[experimental]

689

[experimental]

690

copytrace = heuristics

690

copytrace = heuristics

691

692

In some cases the copy/move candidates found by heuristics can be very large

692

In some cases the copy/move candidates found by heuristics can be very large

693

in number and that will make the algorithm slow. The number of possible

693

in number and that will make the algorithm slow. The number of possible

694

candidates to check can be limited by using the config

694

candidates to check can be limited by using the config

695

`experimental.copytrace.movecandidateslimit` which defaults to 100.

695

`experimental.copytrace.movecandidateslimit` which defaults to 100.

696

"""

696

"""

697

698

if c1.rev() is None:

698

if c1.rev() is None:

699

c1 = c1.p1()

699

c1 = c1.p1()

700

if c2.rev() is None:

700

if c2.rev() is None:

701

c2 = c2.p1()

701

c2 = c2.p1()

702

703

copies = {}

703

copies = {}

704

705

changedfiles = set()

705

changedfiles = set()

706

m1 = c1.manifest()

706

m1 = c1.manifest()

707

if not repo.revs('%d::%d', base.rev(), c2.rev()):

707

if not repo.revs('%d::%d', base.rev(), c2.rev()):

708

# If base is not in c2 branch, we switch to fullcopytracing

708

# If base is not in c2 branch, we switch to fullcopytracing

709

repo.ui.debug("switching to full copytracing as base is not "

709

repo.ui.debug("switching to full copytracing as base is not "

710

"an ancestor of c2\n")

710

"an ancestor of c2\n")

711

return _fullcopytracing(repo, c1, c2, base)

711

return _fullcopytracing(repo, c1, c2, base)

712

713

ctx = c2

713

ctx = c2

714

while ctx != base:

714

while ctx != base:

715

if len(ctx.parents()) == 2:

715

if len(ctx.parents()) == 2:

716

# To keep things simple let's not handle merges

716

# To keep things simple let's not handle merges

717

repo.ui.debug("switching to full copytracing because of merges\n")

717

repo.ui.debug("switching to full copytracing because of merges\n")

718

return _fullcopytracing(repo, c1, c2, base)

718

return _fullcopytracing(repo, c1, c2, base)

719

changedfiles.update(ctx.files())

719

changedfiles.update(ctx.files())

720

ctx = ctx.p1()

720

ctx = ctx.p1()

721

722

cp = _forwardcopies(base, c2)

722

cp = _forwardcopies(base, c2)

723

for dst, src in cp.iteritems():

723

for dst, src in cp.iteritems():

724

if src in m1:

724

if src in m1:

725

copies[dst] = src

725

copies[dst] = src

726

727

# file is missing if it isn't present in the destination, but is present in

727

# file is missing if it isn't present in the destination, but is present in

728

# the base and present in the source.

728

# the base and present in the source.

729

# Presence in the base is important to exclude added files, presence in the

729

# Presence in the base is important to exclude added files, presence in the

730

# source is important to exclude removed files.

730

# source is important to exclude removed files.

731

filt = lambda f: f not in m1 and f in base and f in c2

731

filt = lambda f: f not in m1 and f in base and f in c2

732

missingfiles = [f for f in changedfiles if filt(f)]

732

missingfiles = [f for f in changedfiles if filt(f)]

733

734

if missingfiles:

734

if missingfiles:

735

basenametofilename = collections.defaultdict(list)

735

basenametofilename = collections.defaultdict(list)

736

dirnametofilename = collections.defaultdict(list)

736

dirnametofilename = collections.defaultdict(list)

737

738

for f in m1.filesnotin(base.manifest()):

738

for f in m1.filesnotin(base.manifest()):

739

basename = os.path.basename(f)

739

basename = os.path.basename(f)

740

dirname = os.path.dirname(f)

740

dirname = os.path.dirname(f)

741

basenametofilename[basename].append(f)

741

basenametofilename[basename].append(f)

742

dirnametofilename[dirname].append(f)

742

dirnametofilename[dirname].append(f)

743

744

for f in missingfiles:

744

for f in missingfiles:

745

basename = os.path.basename(f)

745

basename = os.path.basename(f)

746

dirname = os.path.dirname(f)

746

dirname = os.path.dirname(f)

747

samebasename = basenametofilename[basename]

747

samebasename = basenametofilename[basename]

748

samedirname = dirnametofilename[dirname]

748

samedirname = dirnametofilename[dirname]

749

movecandidates = samebasename + samedirname

749

movecandidates = samebasename + samedirname

750

# f is guaranteed to be present in c2, that's why

750

# f is guaranteed to be present in c2, that's why

751

# c2.filectx(f) won't fail

751

# c2.filectx(f) won't fail

752

f2 = c2.filectx(f)

752

f2 = c2.filectx(f)

753

# we can have a lot of candidates which can slow down the heuristics

753

# we can have a lot of candidates which can slow down the heuristics

754

# config value to limit the number of candidates moves to check

754

# config value to limit the number of candidates moves to check

755

maxcandidates = repo.ui.configint('experimental',

755

maxcandidates = repo.ui.configint('experimental',

756

'copytrace.movecandidateslimit')

756

'copytrace.movecandidateslimit')

757

758

if len(movecandidates) > maxcandidates:

758

if len(movecandidates) > maxcandidates:

759

repo.ui.status(_("skipping copytracing for '%s', more "

759

repo.ui.status(_("skipping copytracing for '%s', more "

760

"candidates than the limit: %d\n")

760

"candidates than the limit: %d\n")

761

% (f, len(movecandidates)))

761

% (f, len(movecandidates)))

762

continue

762

continue

763

764

for candidate in movecandidates:

764

for candidate in movecandidates:

765

f1 = c1.filectx(candidate)

765

f1 = c1.filectx(candidate)

766

if _related(f1, f2):

766

if _related(f1, f2):

767

# if there are a few related copies then we'll merge

767

# if there are a few related copies then we'll merge

768

# changes into all of them. This matches the behaviour

768

# changes into all of them. This matches the behaviour

769

# of upstream copytracing

769

# of upstream copytracing

770

copies[candidate] = f

770

copies[candidate] = f

771

772

return copies, {}, {}, {}, {}

772

return copies, {}, {}, {}, {}

773

774

def _related(f1, f2):

774

def _related(f1, f2):

775

"""return True if f1 and f2 filectx have a common ancestor

775

"""return True if f1 and f2 filectx have a common ancestor

776

777

Walk back to common ancestor to see if the two files originate

777

Walk back to common ancestor to see if the two files originate

778

from the same file. Since workingfilectx's rev() is None it messes

778

from the same file. Since workingfilectx's rev() is None it messes

779

up the integer comparison logic, hence the pre-step check for

779

up the integer comparison logic, hence the pre-step check for

780

None (f1 and f2 can only be workingfilectx's initially).

780

None (f1 and f2 can only be workingfilectx's initially).

781

"""

781

"""

782

783

if f1 == f2:

783

if f1 == f2:

784

return f1 # a match

784

return f1 # a match

785

786

g1, g2 = f1.ancestors(), f2.ancestors()

786

g1, g2 = f1.ancestors(), f2.ancestors()

787

try:

787

try:

788

f1r, f2r = f1.linkrev(), f2.linkrev()

788

f1r, f2r = f1.linkrev(), f2.linkrev()

789

790

if f1r is None:

790

if f1r is None:

791

f1 = next(g1)

791

f1 = next(g1)

792

if f2r is None:

792

if f2r is None:

793

f2 = next(g2)

793

f2 = next(g2)

794

795

while True:

795

while True:

796

f1r, f2r = f1.linkrev(), f2.linkrev()

796

f1r, f2r = f1.linkrev(), f2.linkrev()

797

if f1r > f2r:

797

if f1r > f2r:

798

f1 = next(g1)

798

f1 = next(g1)

799

elif f2r > f1r:

799

elif f2r > f1r:

800

f2 = next(g2)

800

f2 = next(g2)

801

else: # f1 and f2 point to files in the same linkrev

801

else: # f1 and f2 point to files in the same linkrev

802

return f1 == f2 # true if they point to the same file

802

return f1 == f2 # true if they point to the same file

803

except StopIteration:

803

except StopIteration:

804

return False

804

return False

805

806

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

806

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

807

"""

807

"""

808

check possible copies of f from msrc to mdst

808

check possible copies of f from msrc to mdst

809

810

srcctx = starting context for f in msrc

810

srcctx = starting context for f in msrc

811

dstctx = destination context for f in mdst

811

dstctx = destination context for f in mdst

812

f = the filename to check (as in msrc)

812

f = the filename to check (as in msrc)

813

base = the changectx used as a merge base

813

base = the changectx used as a merge base

814

tca = topological common ancestor for graft-like scenarios

814

tca = topological common ancestor for graft-like scenarios

815

remotebase = True if base is outside tca::srcctx, False otherwise

815

remotebase = True if base is outside tca::srcctx, False otherwise

816

limit = the rev number to not search beyond

816

limit = the rev number to not search beyond

817

data = dictionary of dictionary to store copy data. (see mergecopies)

817

data = dictionary of dictionary to store copy data. (see mergecopies)

818

819

note: limit is only an optimization, and provides no guarantee that

819

note: limit is only an optimization, and provides no guarantee that

820

irrelevant revisions will not be visited

820

irrelevant revisions will not be visited

821

there is no easy way to make this algorithm stop in a guaranteed way

821

there is no easy way to make this algorithm stop in a guaranteed way

822

once it "goes behind a certain revision".

822

once it "goes behind a certain revision".

823

"""

823

"""

824

825

msrc = srcctx.manifest()

825

msrc = srcctx.manifest()

826

mdst = dstctx.manifest()

826

mdst = dstctx.manifest()

827

mb = base.manifest()

827

mb = base.manifest()

828

mta = tca.manifest()

828

mta = tca.manifest()

829

# Might be true if this call is about finding backward renames,

829

# Might be true if this call is about finding backward renames,

830

# This happens in the case of grafts because the DAG is then rotated.

830

# This happens in the case of grafts because the DAG is then rotated.

831

# If the file exists in both the base and the source, we are not looking

831

# If the file exists in both the base and the source, we are not looking

832

# for a rename on the source side, but on the part of the DAG that is

832

# for a rename on the source side, but on the part of the DAG that is

833

# traversed backwards.

833

# traversed backwards.

834

#

834

#

835

# In the case there is both backward and forward renames (before and after

835

# In the case there is both backward and forward renames (before and after

836

# the base) this is more complicated as we must detect a divergence.

836

# the base) this is more complicated as we must detect a divergence.

837

# We use 'backwards = False' in that case.

837

# We use 'backwards = False' in that case.

838

backwards = not remotebase and base != tca and f in mb

838

backwards = not remotebase and base != tca and f in mb

839

getsrcfctx = _makegetfctx(srcctx)

839

getsrcfctx = _makegetfctx(srcctx)

840

getdstfctx = _makegetfctx(dstctx)

840

getdstfctx = _makegetfctx(dstctx)

841

842

if msrc[f] == mb.get(f) and not remotebase:

842

if msrc[f] == mb.get(f) and not remotebase:

843

# Nothing to merge

843

# Nothing to merge

844

return

844

return

845

846

of = None

846

of = None

847

seen = {f}

847

seen = {f}

848

for oc in getsrcfctx(f, msrc[f]).ancestors():

848

for oc in getsrcfctx(f, msrc[f]).ancestors():

849

of = oc.path()

849

of = oc.path()

850

if of in seen:

850

if of in seen:

851

# check limit late - grab last rename before

851

# check limit late - grab last rename before

852

if oc.linkrev() < limit:

852

if oc.linkrev() < limit:

853

break

853

break

854

continue

854

continue

855

seen.add(of)

855

seen.add(of)

856

857

# remember for dir rename detection

857

# remember for dir rename detection

858

if backwards:

858

if backwards:

859

data['fullcopy'][of] = f # grafting backwards through renames

859

data['fullcopy'][of] = f # grafting backwards through renames

860

else:

860

else:

861

data['fullcopy'][f] = of

861

data['fullcopy'][f] = of

862

if of not in mdst:

862

if of not in mdst:

863

continue # no match, keep looking

863

continue # no match, keep looking

864

if mdst[of] == mb.get(of):

864

if mdst[of] == mb.get(of):

865

return # no merge needed, quit early

865

return # no merge needed, quit early

866

c2 = getdstfctx(of, mdst[of])

866

c2 = getdstfctx(of, mdst[of])

867

# c2 might be a plain new file on added on destination side that is

867

# c2 might be a plain new file on added on destination side that is

868

# unrelated to the droids we are looking for.

868

# unrelated to the droids we are looking for.

869

cr = _related(oc, c2)

869

cr = _related(oc, c2)

870

if cr and (of == f or of == c2.path()): # non-divergent

870

if cr and (of == f or of == c2.path()): # non-divergent

871

if backwards:

871

if backwards:

872

data['copy'][of] = f

872

data['copy'][of] = f

873

elif of in mb:

873

elif of in mb:

874

data['copy'][f] = of

874

data['copy'][f] = of

875

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

875

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

876

data['copy'][of] = f

876

data['copy'][of] = f

877

del data['fullcopy'][f]

877

del data['fullcopy'][f]

878

data['fullcopy'][of] = f

878

data['fullcopy'][of] = f

879

else: # divergence w.r.t. graft CA on one side of topological CA

879

else: # divergence w.r.t. graft CA on one side of topological CA

880

for sf in seen:

880

for sf in seen:

881

if sf in mb:

881

if sf in mb:

882

assert sf not in data['diverge']

882

assert sf not in data['diverge']

883

data['diverge'][sf] = [f, of]

883

data['diverge'][sf] = [f, of]

884

break

884

break

885

return

885

return

886

887

if of in mta:

887

if of in mta:

888

if backwards or remotebase:

888

if backwards or remotebase:

889

data['incomplete'][of] = f

889

data['incomplete'][of] = f

890

else:

890

else:

891

for sf in seen:

891

for sf in seen:

892

if sf in mb:

892

if sf in mb:

893

if tca == base:

893

if tca == base:

894

data['diverge'].setdefault(sf, []).append(f)

894

data['diverge'].setdefault(sf, []).append(f)

895

else:

895

else:

896

data['incompletediverge'][sf] = [of, f]

896

data['incompletediverge'][sf] = [of, f]

897

return

897

return

898

899

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

899

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

900

"""reproduce copies from fromrev to rev in the dirstate

900

"""reproduce copies from fromrev to rev in the dirstate

901

902

If skiprev is specified, it's a revision that should be used to

902

If skiprev is specified, it's a revision that should be used to

903

filter copy records. Any copies that occur between fromrev and

903

filter copy records. Any copies that occur between fromrev and

904

skiprev will not be duplicated, even if they appear in the set of

904

skiprev will not be duplicated, even if they appear in the set of

905

copies between fromrev and rev.

905

copies between fromrev and rev.

906

"""

906

"""

907

exclude = {}

907

exclude = {}

908

ctraceconfig = repo.ui.config('experimental', 'copytrace')

908

ctraceconfig = repo.ui.config('experimental', 'copytrace')

909

bctrace = stringutil.parsebool(ctraceconfig)

909

bctrace = stringutil.parsebool(ctraceconfig)

910

if (skiprev is not None and

910

if (skiprev is not None and

911

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

911

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

912

# copytrace='off' skips this line, but not the entire function because

912

# copytrace='off' skips this line, but not the entire function because

913

# the line below is O(size of the repo) during a rebase, while the rest

913

# the line below is O(size of the repo) during a rebase, while the rest

914

# of the function is much faster (and is required for carrying copy

914

# of the function is much faster (and is required for carrying copy

915

# metadata across the rebase anyway).

915

# metadata across the rebase anyway).

916

exclude = pathcopies(repo[fromrev], repo[skiprev])

916

exclude = pathcopies(repo[fromrev], repo[skiprev])

917

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

917

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

918

# copies.pathcopies returns backward renames, so dst might not

918

# copies.pathcopies returns backward renames, so dst might not

919

# actually be in the dirstate

919

# actually be in the dirstate

920

if dst in exclude:

920

if dst in exclude:

921

continue

921

continue

922

wctx[dst].markcopied(src)

922

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 if a is None:
                     a = node.wdirrev
                 if b is None:
                     b = node.wdirrev
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = node.wdirrev
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == node.wdirrev:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in list(t.items()):
                     if k in src and v in dst:
                         del t[k]
                 return t
-            def _tracefile(fctx, am, limit=-1):
+            def _tracefile(fctx, am, limit=node.nullrev):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and not f.isintroducedafter(limit):
                         return None
             def _dirstatecopies(d, match=None):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg('debug.copies:    looking into rename from %s to %s\n'
                         % (a, b))
                 limit = _findlimit(repo, a.rev(), b.rev())
                 if limit is None:
-                    limit = -1
+                    limit = node.nullrev
                 if debug:
                     dbg('debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg('debug.copies:      missing file to search: %d\n' % len(missing))
                 for f in missing:
                     if debug:
                         dbg('debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         if debug:
                             dbg('debug.copies:          rename of: %s\n' % ofctx._path)
                         cm[f] = ofctx.path()
                     if debug:
                         dbg('debug.copies:          time: %f seconds\n'
                             % (util.timer() - start))
                 return cm
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 if debug:
                     repo.ui.debug('debug.copies: searching copies from %s to %s\n'
                                   % (x, y))
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: forward\n')
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: backward\n')
                     return _backwardrenames(x, y)
                 if debug:
                     repo.ui.debug('debug.copies: search mode: combined\n')
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirfilenodeids:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 boolctrace = stringutil.parsebool(copytracing)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 elif boolctrace is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.isancestorof(wc) is False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not base.isancestorof(_c1)
                 dirtyc2 = not base.isancestorof(_c2)
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2,
                                                   m1.filesnotin(mta, repo.narrowmatch()),
                                                   m2.filesnotin(mta, repo.narrowmatch()),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if oc.linkrev() < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config('experimental', 'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if (skiprev is not None and
                     (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)