upstream/mercurial-mirror Commit - r34080:b4b19609

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import heapq

10

import heapq

11

12

from . import (

12

from . import (

13

match as matchmod,

13

match as matchmod,

14

node,

14

node,

15

pathutil,

15

pathutil,

16

scmutil,

16

scmutil,

17

util,

17

util,

18

)

18

)

19

20

def _findlimit(repo, a, b):

20

def _findlimit(repo, a, b):

21

"""

21

"""

22

Find the last revision that needs to be checked to ensure that a full

22

Find the last revision that needs to be checked to ensure that a full

23

transitive closure for file copies can be properly calculated.

23

transitive closure for file copies can be properly calculated.

24

Generally, this means finding the earliest revision number that's an

24

Generally, this means finding the earliest revision number that's an

25

ancestor of a or b but not both, except when a or b is a direct descendent

25

ancestor of a or b but not both, except when a or b is a direct descendent

26

of the other, in which case we can return the minimum revnum of a and b.

26

of the other, in which case we can return the minimum revnum of a and b.

27

None if no such revision exists.

27

None if no such revision exists.

28

"""

28

"""

29

30

# basic idea:

30

# basic idea:

31

# - mark a and b with different sides

31

# - mark a and b with different sides

32

# - if a parent's children are all on the same side, the parent is

32

# - if a parent's children are all on the same side, the parent is

33

# on that side, otherwise it is on no side

33

# on that side, otherwise it is on no side

34

# - walk the graph in topological order with the help of a heap;

34

# - walk the graph in topological order with the help of a heap;

35

# - add unseen parents to side map

35

# - add unseen parents to side map

36

# - clear side of any parent that has children on different sides

36

# - clear side of any parent that has children on different sides

37

# - track number of interesting revs that might still be on a side

37

# - track number of interesting revs that might still be on a side

38

# - track the lowest interesting rev seen

38

# - track the lowest interesting rev seen

39

# - quit when interesting revs is zero

39

# - quit when interesting revs is zero

40

41

cl = repo.changelog

41

cl = repo.changelog

42

working = len(cl) # pseudo rev for the working directory

42

working = len(cl) # pseudo rev for the working directory

43

if a is None:

43

if a is None:

44

a = working

44

a = working

45

if b is None:

45

if b is None:

46

b = working

46

b = working

47

48

side = {a: -1, b: 1}

48

side = {a: -1, b: 1}

49

visit = [-a, -b]

49

visit = [-a, -b]

50

heapq.heapify(visit)

50

heapq.heapify(visit)

51

interesting = len(visit)

51

interesting = len(visit)

52

hascommonancestor = False

52

hascommonancestor = False

53

limit = working

53

limit = working

54

55

while interesting:

55

while interesting:

56

r = -heapq.heappop(visit)

56

r = -heapq.heappop(visit)

57

if r == working:

57

if r == working:

58

parents = [cl.rev(p) for p in repo.dirstate.parents()]

58

parents = [cl.rev(p) for p in repo.dirstate.parents()]

59

else:

59

else:

60

parents = cl.parentrevs(r)

60

parents = cl.parentrevs(r)

61

for p in parents:

61

for p in parents:

62

if p < 0:

62

if p < 0:

63

continue

63

continue

64

if p not in side:

64

if p not in side:

65

# first time we see p; add it to visit

65

# first time we see p; add it to visit

66

side[p] = side[r]

66

side[p] = side[r]

67

if side[p]:

67

if side[p]:

68

interesting += 1

68

interesting += 1

69

heapq.heappush(visit, -p)

69

heapq.heappush(visit, -p)

70

elif side[p] and side[p] != side[r]:

70

elif side[p] and side[p] != side[r]:

71

# p was interesting but now we know better

71

# p was interesting but now we know better

72

side[p] = 0

72

side[p] = 0

73

interesting -= 1

73

interesting -= 1

74

hascommonancestor = True

74

hascommonancestor = True

75

if side[r]:

75

if side[r]:

76

limit = r # lowest rev visited

76

limit = r # lowest rev visited

77

interesting -= 1

77

interesting -= 1

78

79

if not hascommonancestor:

79

if not hascommonancestor:

80

return None

80

return None

81

82

# Consider the following flow (see test-commit-amend.t under issue4405):

82

# Consider the following flow (see test-commit-amend.t under issue4405):

83

# 1/ File 'a0' committed

83

# 1/ File 'a0' committed

84

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

84

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

85

# 3/ Move back to first commit

85

# 3/ Move back to first commit

86

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

86

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

87

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

87

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

88

#

88

#

89

# During the amend in step five, we will be in this state:

89

# During the amend in step five, we will be in this state:

90

#

90

#

91

# @ 3 temporary amend commit for a1-amend

91

# @ 3 temporary amend commit for a1-amend

92

# |

92

# |

93

# o 2 a1-amend

93

# o 2 a1-amend

94

# |

94

# |

95

# | o 1 a1

95

# | o 1 a1

96

# |/

96

# |/

97

# o 0 a0

97

# o 0 a0

98

#

98

#

99

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

99

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

100

# yet the filelog has the copy information in rev 1 and we will not look

100

# yet the filelog has the copy information in rev 1 and we will not look

101

# back far enough unless we also look at the a and b as candidates.

101

# back far enough unless we also look at the a and b as candidates.

102

# This only occurs when a is a descendent of b or visa-versa.

102

# This only occurs when a is a descendent of b or visa-versa.

103

return min(limit, a, b)

103

return min(limit, a, b)

104

105

def _chain(src, dst, a, b):

105

def _chain(src, dst, a, b):

106

'''chain two sets of copies a->b'''

106

'''chain two sets of copies a->b'''

107

t = a.copy()

107

t = a.copy()

108

for k, v in b.iteritems():

108

for k, v in b.iteritems():

109

if v in t:

109

if v in t:

110

# found a chain

110

# found a chain

111

if t[v] != k:

111

if t[v] != k:

112

# file wasn't renamed back to itself

112

# file wasn't renamed back to itself

113

t[k] = t[v]

113

t[k] = t[v]

114

if v not in dst:

114

if v not in dst:

115

# chain was a rename, not a copy

115

# chain was a rename, not a copy

116

del t[v]

116

del t[v]

117

if v in src:

117

if v in src:

118

# file is a copy of an existing file

118

# file is a copy of an existing file

119

t[k] = v

119

t[k] = v

120

121

# remove criss-crossed copies

121

# remove criss-crossed copies

122

for k, v in t.items():

122

for k, v in t.items():

123

if k in src and v in dst:

123

if k in src and v in dst:

124

del t[k]

124

del t[k]

125

126

return t

126

return t

127

128

def _tracefile(fctx, am, limit=-1):

128

def _tracefile(fctx, am, limit=-1):

129

'''return file context that is the ancestor of fctx present in ancestor

129

'''return file context that is the ancestor of fctx present in ancestor

130

manifest am, stopping after the first ancestor lower than limit'''

130

manifest am, stopping after the first ancestor lower than limit'''

131

132

for f in fctx.ancestors():

132

for f in fctx.ancestors():

133

if am.get(f.path(), None) == f.filenode():

133

if am.get(f.path(), None) == f.filenode():

134

return f

134

return f

135

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

135

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

136

return None

136

return None

137

138

def _dirstatecopies(d):

138

def _dirstatecopies(d):

139

ds = d._repo.dirstate

139

ds = d._repo.dirstate

140

c = ds.copies().copy()

140

c = ds.copies().copy()

141

for k in c.keys():

141

for k in c.keys():

142

if ds[k] not in 'anm':

142

if ds[k] not in 'anm':

143

del c[k]

143

del c[k]

144

return c

144

return c

145

146

def _computeforwardmissing(a, b, match=None):

146

def _computeforwardmissing(a, b, match=None):

147

"""Computes which files are in b but not a.

147

"""Computes which files are in b but not a.

148

This is its own function so extensions can easily wrap this call to see what

148

This is its own function so extensions can easily wrap this call to see what

149

files _forwardcopies is about to process.

149

files _forwardcopies is about to process.

150

"""

150

"""

151

ma = a.manifest()

151

ma = a.manifest()

152

mb = b.manifest()

152

mb = b.manifest()

153

return mb.filesnotin(ma, match=match)

153

return mb.filesnotin(ma, match=match)

154

155

def _forwardcopies(a, b, match=None):

155

def _forwardcopies(a, b, match=None):

156

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

156

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

157

158

# check for working copy

158

# check for working copy

159

w = None

159

w = None

160

if b.rev() is None:

160

if b.rev() is None:

161

w = b

161

w = b

162

b = w.p1()

162

b = w.p1()

163

if a == b:

163

if a == b:

164

# short-circuit to avoid issues with merge states

164

# short-circuit to avoid issues with merge states

165

return _dirstatecopies(w)

165

return _dirstatecopies(w)

166

167

# files might have to be traced back to the fctx parent of the last

167

# files might have to be traced back to the fctx parent of the last

168

# one-side-only changeset, but not further back than that

168

# one-side-only changeset, but not further back than that

169

limit = _findlimit(a._repo, a.rev(), b.rev())

169

limit = _findlimit(a._repo, a.rev(), b.rev())

170

if limit is None:

170

if limit is None:

171

limit = -1

171

limit = -1

172

am = a.manifest()

172

am = a.manifest()

173

174

# find where new files came from

174

# find where new files came from

175

# we currently don't try to find where old files went, too expensive

175

# we currently don't try to find where old files went, too expensive

176

# this means we can miss a case like 'hg rm b; hg cp a b'

176

# this means we can miss a case like 'hg rm b; hg cp a b'

177

cm = {}

177

cm = {}

178

179

# Computing the forward missing is quite expensive on large manifests, since

179

# Computing the forward missing is quite expensive on large manifests, since

180

# it compares the entire manifests. We can optimize it in the common use

180

# it compares the entire manifests. We can optimize it in the common use

181

# case of computing what copies are in a commit versus its parent (like

181

# case of computing what copies are in a commit versus its parent (like

182

# during a rebase or histedit). Note, we exclude merge commits from this

182

# during a rebase or histedit). Note, we exclude merge commits from this

183

# optimization, since the ctx.files() for a merge commit is not correct for

183

# optimization, since the ctx.files() for a merge commit is not correct for

184

# this comparison.

184

# this comparison.

185

forwardmissingmatch = match

185

forwardmissingmatch = match

186

if b.p1() == a and b.p2().node() == node.nullid:

186

if b.p1() == a and b.p2().node() == node.nullid:

187

filesmatcher = scmutil.matchfiles(a._repo, b.files())

187

filesmatcher = scmutil.matchfiles(a._repo, b.files())

188

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

188

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

189

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

189

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

190

191

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

191

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

192

for f in missing:

192

for f in missing:

193

fctx = b[f]

193

fctx = b[f]

194

fctx._ancestrycontext = ancestrycontext

194

fctx._ancestrycontext = ancestrycontext

195

ofctx = _tracefile(fctx, am, limit)

195

ofctx = _tracefile(fctx, am, limit)

196

if ofctx:

196

if ofctx:

197

cm[f] = ofctx.path()

197

cm[f] = ofctx.path()

198

199

# combine copies from dirstate if necessary

199

# combine copies from dirstate if necessary

200

if w is not None:

200

if w is not None:

201

cm = _chain(a, w, cm, _dirstatecopies(w))

201

cm = _chain(a, w, cm, _dirstatecopies(w))

202

203

return cm

203

return cm

204

205

def _backwardrenames(a, b):

205

def _backwardrenames(a, b):

206

if a._repo.ui.config('experimental', 'copytrace') == 'off':

206

if a._repo.ui.config('experimental', 'copytrace') == 'off':

207

return {}

207

return {}

208

209

# Even though we're not taking copies into account, 1:n rename situations

209

# Even though we're not taking copies into account, 1:n rename situations

210

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

210

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

211

# arbitrarily pick one of the renames.

211

# arbitrarily pick one of the renames.

212

f = _forwardcopies(b, a)

212

f = _forwardcopies(b, a)

213

r = {}

213

r = {}

214

for k, v in sorted(f.iteritems()):

214

for k, v in sorted(f.iteritems()):

215

# remove copies

215

# remove copies

216

if v in a:

216

if v in a:

217

continue

217

continue

218

r[v] = k

218

r[v] = k

219

return r

219

return r

220

221

def pathcopies(x, y, match=None):

221

def pathcopies(x, y, match=None):

222

'''find {dst@y: src@x} copy mapping for directed compare'''

222

'''find {dst@y: src@x} copy mapping for directed compare'''

223

if x == y or not x or not y:

223

if x == y or not x or not y:

224

return {}

224

return {}

225

a = y.ancestor(x)

225

a = y.ancestor(x)

226

if a == x:

226

if a == x:

227

return _forwardcopies(x, y, match=match)

227

return _forwardcopies(x, y, match=match)

228

if a == y:

228

if a == y:

229

return _backwardrenames(x, y)

229

return _backwardrenames(x, y)

230

return _chain(x, y, _backwardrenames(x, a),

230

return _chain(x, y, _backwardrenames(x, a),

231

_forwardcopies(a, y, match=match))

231

_forwardcopies(a, y, match=match))

232

233

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

233

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

234

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

234

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

235

and c2. This is its own function so extensions can easily wrap this call

235

and c2. This is its own function so extensions can easily wrap this call

236

to see what files mergecopies is about to process.

236

to see what files mergecopies is about to process.

237

238

Even though c1 and c2 are not used in this function, they are useful in

238

Even though c1 and c2 are not used in this function, they are useful in

239

other extensions for being able to read the file nodes of the changed files.

239

other extensions for being able to read the file nodes of the changed files.

240

241

"baselabel" can be passed to help distinguish the multiple computations

241

"baselabel" can be passed to help distinguish the multiple computations

242

done in the graft case.

242

done in the graft case.

243

"""

243

"""

244

u1 = sorted(addedinm1 - addedinm2)

244

u1 = sorted(addedinm1 - addedinm2)

245

u2 = sorted(addedinm2 - addedinm1)

245

u2 = sorted(addedinm2 - addedinm1)

246

247

header = " unmatched files in %s"

247

header = " unmatched files in %s"

248

if baselabel:

248

if baselabel:

249

header += ' (from %s)' % baselabel

249

header += ' (from %s)' % baselabel

250

if u1:

250

if u1:

251

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

251

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

252

if u2:

252

if u2:

253

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

253

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

254

return u1, u2

254

return u1, u2

255

256

def _makegetfctx(ctx):

256

def _makegetfctx(ctx):

257

"""return a 'getfctx' function suitable for _checkcopies usage

257

"""return a 'getfctx' function suitable for _checkcopies usage

258

259

We have to re-setup the function building 'filectx' for each

259

We have to re-setup the function building 'filectx' for each

260

'_checkcopies' to ensure the linkrev adjustment is properly setup for

260

'_checkcopies' to ensure the linkrev adjustment is properly setup for

261

each. Linkrev adjustment is important to avoid bug in rename

261

each. Linkrev adjustment is important to avoid bug in rename

262

detection. Moreover, having a proper '_ancestrycontext' setup ensures

262

detection. Moreover, having a proper '_ancestrycontext' setup ensures

263

the performance impact of this adjustment is kept limited. Without it,

263

the performance impact of this adjustment is kept limited. Without it,

264

each file could do a full dag traversal making the time complexity of

264

each file could do a full dag traversal making the time complexity of

265

the operation explode (see issue4537).

265

the operation explode (see issue4537).

266

267

This function exists here mostly to limit the impact on stable. Feel

267

This function exists here mostly to limit the impact on stable. Feel

268

free to refactor on default.

268

free to refactor on default.

269

"""

269

"""

270

rev = ctx.rev()

270

rev = ctx.rev()

271

repo = ctx._repo

271

repo = ctx._repo

272

ac = getattr(ctx, '_ancestrycontext', None)

272

ac = getattr(ctx, '_ancestrycontext', None)

273

if ac is None:

273

if ac is None:

274

revs = [rev]

274

revs = [rev]

275

if rev is None:

275

if rev is None:

276

revs = [p.rev() for p in ctx.parents()]

276

revs = [p.rev() for p in ctx.parents()]

277

ac = repo.changelog.ancestors(revs, inclusive=True)

277

ac = repo.changelog.ancestors(revs, inclusive=True)

278

ctx._ancestrycontext = ac

278

ctx._ancestrycontext = ac

279

def makectx(f, n):

279

def makectx(f, n):

280

if n in node.wdirnodes: # in a working context?

280

if n in node.wdirnodes: # in a working context?

281

if ctx.rev() is None:

281

if ctx.rev() is None:

282

return ctx.filectx(f)

282

return ctx.filectx(f)

283

return repo[None][f]

283

return repo[None][f]

284

fctx = repo.filectx(f, fileid=n)

284

fctx = repo.filectx(f, fileid=n)

285

# setup only needed for filectx not create from a changectx

285

# setup only needed for filectx not create from a changectx

286

fctx._ancestrycontext = ac

286

fctx._ancestrycontext = ac

287

fctx._descendantrev = rev

287

fctx._descendantrev = rev

288

return fctx

288

return fctx

289

return util.lrucachefunc(makectx)

289

return util.lrucachefunc(makectx)

290

291

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

291

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

292

"""combine partial copy paths"""

292

"""combine partial copy paths"""

293

remainder = {}

293

remainder = {}

294

for f in copyfrom:

294

for f in copyfrom:

295

if f in copyto:

295

if f in copyto:

296

finalcopy[copyto[f]] = copyfrom[f]

296

finalcopy[copyto[f]] = copyfrom[f]

297

del copyto[f]

297

del copyto[f]

298

for f in incompletediverge:

298

for f in incompletediverge:

299

assert f not in diverge

299

assert f not in diverge

300

ic = incompletediverge[f]

300

ic = incompletediverge[f]

301

if ic[0] in copyto:

301

if ic[0] in copyto:

302

diverge[f] = [copyto[ic[0]], ic[1]]

302

diverge[f] = [copyto[ic[0]], ic[1]]

303

else:

303

else:

304

remainder[f] = ic

304

remainder[f] = ic

305

return remainder

305

return remainder

306

307

def mergecopies(repo, c1, c2, base):

307

def mergecopies(repo, c1, c2, base):

308

"""

308

"""

309

The basic algorithm for copytracing. Copytracing is used in commands like

309

The function calling different copytracing algorithms on the basis of config

310

rebase, merge, unshelve, etc to merge files that were moved/ copied in one

310

which find moves and copies between context c1 and c2 that are relevant for

311

merge parent and modified in another. For example:

311

merging. 'base' will be used as the merge base.

312

313

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

314

files that were moved/ copied in one merge parent and modified in another.

315

For example:

312

316

313

o ---> 4 another commit

317

o ---> 4 another commit

314

|

318

|

315

| o ---> 3 commit that modifies a.txt

319

| o ---> 3 commit that modifies a.txt

316

| /

320

| /

317

o / ---> 2 commit that moves a.txt to b.txt

321

o / ---> 2 commit that moves a.txt to b.txt

318

|/

322

|/

319

o ---> 1 merge base

323

o ---> 1 merge base

320

324

321

If we try to rebase revision 3 on revision 4, since there is no a.txt in

325

If we try to rebase revision 3 on revision 4, since there is no a.txt in

322

revision 4, and if user have copytrace disabled, we prints the following

326

revision 4, and if user have copytrace disabled, we prints the following

323

message:

327

message:

324

328

325

```other changed <file> which local deleted```

329

```other changed <file> which local deleted```

326

330

327

If copytrace is enabled, this function finds all the new files that were

328

added from merge base up to the top commit (here 4), and for each file it

329

checks if this file was copied from another file (a.txt in the above case).

330

331

Find moves and copies between context c1 and c2 that are relevant

332

for merging. 'base' will be used as the merge base.

333

334

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

331

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

335

"dirmove".

332

"dirmove".

336

333

337

"copy" is a mapping from destination name -> source name,

334

"copy" is a mapping from destination name -> source name,

338

where source is in c1 and destination is in c2 or vice-versa.

335

where source is in c1 and destination is in c2 or vice-versa.

339

336

340

"movewithdir" is a mapping from source name -> destination name,

337

"movewithdir" is a mapping from source name -> destination name,

341

where the file at source present in one context but not the other

338

where the file at source present in one context but not the other

342

needs to be moved to destination by the merge process, because the

339

needs to be moved to destination by the merge process, because the

343

other context moved the directory it is in.

340

other context moved the directory it is in.

344

341

345

"diverge" is a mapping of source name -> list of destination names

342

"diverge" is a mapping of source name -> list of destination names

346

for divergent renames.

343

for divergent renames.

347

344

348

"renamedelete" is a mapping of source name -> list of destination

345

"renamedelete" is a mapping of source name -> list of destination

349

names for files deleted in c1 that were renamed in c2 or vice-versa.

346

names for files deleted in c1 that were renamed in c2 or vice-versa.

350

347

351

"dirmove" is a mapping of detected source dir -> destination dir renames.

348

"dirmove" is a mapping of detected source dir -> destination dir renames.

352

This is needed for handling changes to new files previously grafted into

349

This is needed for handling changes to new files previously grafted into

353

renamed directories.

350

renamed directories.

354

"""

351

"""

355

# avoid silly behavior for update from empty dir

352

# avoid silly behavior for update from empty dir

356

if not c1 or not c2 or c1 == c2:

353

if not c1 or not c2 or c1 == c2:

357

return {}, {}, {}, {}, {}

354

return {}, {}, {}, {}, {}

358

355

359

# avoid silly behavior for parent -> working dir

356

# avoid silly behavior for parent -> working dir

360

if c2.node() is None and c1.node() == repo.dirstate.p1():

357

if c2.node() is None and c1.node() == repo.dirstate.p1():

361

return repo.dirstate.copies(), {}, {}, {}, {}

358

return repo.dirstate.copies(), {}, {}, {}, {}

362

359

360

copytracing = repo.ui.config('experimental', 'copytrace')

361

363

# Copy trace disabling is explicitly below the node == p1 logic above

362

# Copy trace disabling is explicitly below the node == p1 logic above

364

# because the logic above is required for a simple copy to be kept across a

363

# because the logic above is required for a simple copy to be kept across a

365

# rebase.

364

# rebase.

366

if repo.ui.config('experimental', 'copytrace') == 'off':

365

if copytracing == 'off':

367

return {}, {}, {}, {}, {}

366

return {}, {}, {}, {}, {}

367

else:

368

return _fullcopytracing(repo, c1, c2, base)

368

369

370

def _fullcopytracing(repo, c1, c2, base):

371

""" The full copytracing algorithm which finds all the new files that were

372

added from merge base up to the top commit and for each file it checks if

373

this file was copied from another file.

374

375

This is pretty slow when a lot of changesets are involved but will track all

376

the copies.

377

"""

369

# In certain scenarios (e.g. graft, update or rebase), base can be

378

# In certain scenarios (e.g. graft, update or rebase), base can be

370

# overridden We still need to know a real common ancestor in this case We

379

# overridden We still need to know a real common ancestor in this case We

371

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

380

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

372

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

381

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

373

# caller may not know if the revision passed in lieu of the CA is a genuine

382

# caller may not know if the revision passed in lieu of the CA is a genuine

374

# common ancestor or not without explicitly checking it, it's better to

383

# common ancestor or not without explicitly checking it, it's better to

375

# determine that here.

384

# determine that here.

376

#

385

#

377

# base.descendant(wc) and base.descendant(base) are False, work around that

386

# base.descendant(wc) and base.descendant(base) are False, work around that

378

_c1 = c1.p1() if c1.rev() is None else c1

387

_c1 = c1.p1() if c1.rev() is None else c1

379

_c2 = c2.p1() if c2.rev() is None else c2

388

_c2 = c2.p1() if c2.rev() is None else c2

380

# an endpoint is "dirty" if it isn't a descendant of the merge base

389

# an endpoint is "dirty" if it isn't a descendant of the merge base

381

# if we have a dirty endpoint, we need to trigger graft logic, and also

390

# if we have a dirty endpoint, we need to trigger graft logic, and also

382

# keep track of which endpoint is dirty

391

# keep track of which endpoint is dirty

383

dirtyc1 = not (base == _c1 or base.descendant(_c1))

392

dirtyc1 = not (base == _c1 or base.descendant(_c1))

384

dirtyc2 = not (base == _c2 or base.descendant(_c2))

393

dirtyc2 = not (base == _c2 or base.descendant(_c2))

385

graft = dirtyc1 or dirtyc2

394

graft = dirtyc1 or dirtyc2

386

tca = base

395

tca = base

387

if graft:

396

if graft:

388

tca = _c1.ancestor(_c2)

397

tca = _c1.ancestor(_c2)

389

398

390

limit = _findlimit(repo, c1.rev(), c2.rev())

399

limit = _findlimit(repo, c1.rev(), c2.rev())

391

if limit is None:

400

if limit is None:

392

# no common ancestor, no copies

401

# no common ancestor, no copies

393

return {}, {}, {}, {}, {}

402

return {}, {}, {}, {}, {}

394

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

403

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

395

404

396

m1 = c1.manifest()

405

m1 = c1.manifest()

397

m2 = c2.manifest()

406

m2 = c2.manifest()

398

mb = base.manifest()

407

mb = base.manifest()

399

408

400

# gather data from _checkcopies:

409

# gather data from _checkcopies:

401

# - diverge = record all diverges in this dict

410

# - diverge = record all diverges in this dict

402

# - copy = record all non-divergent copies in this dict

411

# - copy = record all non-divergent copies in this dict

403

# - fullcopy = record all copies in this dict

412

# - fullcopy = record all copies in this dict

404

# - incomplete = record non-divergent partial copies here

413

# - incomplete = record non-divergent partial copies here

405

# - incompletediverge = record divergent partial copies here

414

# - incompletediverge = record divergent partial copies here

406

diverge = {} # divergence data is shared

415

diverge = {} # divergence data is shared

407

incompletediverge = {}

416

incompletediverge = {}

408

data1 = {'copy': {},

417

data1 = {'copy': {},

409

'fullcopy': {},

418

'fullcopy': {},

410

'incomplete': {},

419

'incomplete': {},

411

'diverge': diverge,

420

'diverge': diverge,

412

'incompletediverge': incompletediverge,

421

'incompletediverge': incompletediverge,

413

}

422

}

414

data2 = {'copy': {},

423

data2 = {'copy': {},

415

'fullcopy': {},

424

'fullcopy': {},

416

'incomplete': {},

425

'incomplete': {},

417

'diverge': diverge,

426

'diverge': diverge,

418

'incompletediverge': incompletediverge,

427

'incompletediverge': incompletediverge,

419

}

428

}

420

429

421

# find interesting file sets from manifests

430

# find interesting file sets from manifests

422

addedinm1 = m1.filesnotin(mb)

431

addedinm1 = m1.filesnotin(mb)

423

addedinm2 = m2.filesnotin(mb)

432

addedinm2 = m2.filesnotin(mb)

424

bothnew = sorted(addedinm1 & addedinm2)

433

bothnew = sorted(addedinm1 & addedinm2)

425

if tca == base:

434

if tca == base:

426

# unmatched file from base

435

# unmatched file from base

427

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

436

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

428

u1u, u2u = u1r, u2r

437

u1u, u2u = u1r, u2r

429

else:

438

else:

430

# unmatched file from base (DAG rotation in the graft case)

439

# unmatched file from base (DAG rotation in the graft case)

431

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

440

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

432

baselabel='base')

441

baselabel='base')

433

# unmatched file from topological common ancestors (no DAG rotation)

442

# unmatched file from topological common ancestors (no DAG rotation)

434

# need to recompute this for directory move handling when grafting

443

# need to recompute this for directory move handling when grafting

435

mta = tca.manifest()

444

mta = tca.manifest()

436

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

445

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

437

m2.filesnotin(mta),

446

m2.filesnotin(mta),

438

baselabel='topological common ancestor')

447

baselabel='topological common ancestor')

439

448

440

for f in u1u:

449

for f in u1u:

441

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

450

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

442

451

443

for f in u2u:

452

for f in u2u:

444

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

453

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

445

454

446

copy = dict(data1['copy'])

455

copy = dict(data1['copy'])

447

copy.update(data2['copy'])

456

copy.update(data2['copy'])

448

fullcopy = dict(data1['fullcopy'])

457

fullcopy = dict(data1['fullcopy'])

449

fullcopy.update(data2['fullcopy'])

458

fullcopy.update(data2['fullcopy'])

450

459

451

if dirtyc1:

460

if dirtyc1:

452

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

461

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

453

incompletediverge)

462

incompletediverge)

454

else:

463

else:

455

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

464

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

456

incompletediverge)

465

incompletediverge)

457

466

458

renamedelete = {}

467

renamedelete = {}

459

renamedeleteset = set()

468

renamedeleteset = set()

460

divergeset = set()

469

divergeset = set()

461

for of, fl in diverge.items():

470

for of, fl in diverge.items():

462

if len(fl) == 1 or of in c1 or of in c2:

471

if len(fl) == 1 or of in c1 or of in c2:

463

del diverge[of] # not actually divergent, or not a rename

472

del diverge[of] # not actually divergent, or not a rename

464

if of not in c1 and of not in c2:

473

if of not in c1 and of not in c2:

465

# renamed on one side, deleted on the other side, but filter

474

# renamed on one side, deleted on the other side, but filter

466

# out files that have been renamed and then deleted

475

# out files that have been renamed and then deleted

467

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

476

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

468

renamedeleteset.update(fl) # reverse map for below

477

renamedeleteset.update(fl) # reverse map for below

469

else:

478

else:

470

divergeset.update(fl) # reverse map for below

479

divergeset.update(fl) # reverse map for below

471

480

472

if bothnew:

481

if bothnew:

473

repo.ui.debug(" unmatched files new in both:\n %s\n"

482

repo.ui.debug(" unmatched files new in both:\n %s\n"

474

% "\n ".join(bothnew))

483

% "\n ".join(bothnew))

475

bothdiverge = {}

484

bothdiverge = {}

476

bothincompletediverge = {}

485

bothincompletediverge = {}

477

remainder = {}

486

remainder = {}

478

both1 = {'copy': {},

487

both1 = {'copy': {},

479

'fullcopy': {},

488

'fullcopy': {},

480

'incomplete': {},

489

'incomplete': {},

481

'diverge': bothdiverge,

490

'diverge': bothdiverge,

482

'incompletediverge': bothincompletediverge

491

'incompletediverge': bothincompletediverge

483

}

492

}

484

both2 = {'copy': {},

493

both2 = {'copy': {},

485

'fullcopy': {},

494

'fullcopy': {},

486

'incomplete': {},

495

'incomplete': {},

487

'diverge': bothdiverge,

496

'diverge': bothdiverge,

488

'incompletediverge': bothincompletediverge

497

'incompletediverge': bothincompletediverge

489

}

498

}

490

for f in bothnew:

499

for f in bothnew:

491

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

500

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

492

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

501

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

493

if dirtyc1:

502

if dirtyc1:

494

# incomplete copies may only be found on the "dirty" side for bothnew

503

# incomplete copies may only be found on the "dirty" side for bothnew

495

assert not both2['incomplete']

504

assert not both2['incomplete']

496

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

505

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

497

bothincompletediverge)

506

bothincompletediverge)

498

elif dirtyc2:

507

elif dirtyc2:

499

assert not both1['incomplete']

508

assert not both1['incomplete']

500

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

509

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

501

bothincompletediverge)

510

bothincompletediverge)

502

else:

511

else:

503

# incomplete copies and divergences can't happen outside grafts

512

# incomplete copies and divergences can't happen outside grafts

504

assert not both1['incomplete']

513

assert not both1['incomplete']

505

assert not both2['incomplete']

514

assert not both2['incomplete']

506

assert not bothincompletediverge

515

assert not bothincompletediverge

507

for f in remainder:

516

for f in remainder:

508

assert f not in bothdiverge

517

assert f not in bothdiverge

509

ic = remainder[f]

518

ic = remainder[f]

510

if ic[0] in (m1 if dirtyc1 else m2):

519

if ic[0] in (m1 if dirtyc1 else m2):

511

# backed-out rename on one side, but watch out for deleted files

520

# backed-out rename on one side, but watch out for deleted files

512

bothdiverge[f] = ic

521

bothdiverge[f] = ic

513

for of, fl in bothdiverge.items():

522

for of, fl in bothdiverge.items():

514

if len(fl) == 2 and fl[0] == fl[1]:

523

if len(fl) == 2 and fl[0] == fl[1]:

515

copy[fl[0]] = of # not actually divergent, just matching renames

524

copy[fl[0]] = of # not actually divergent, just matching renames

516

525

517

if fullcopy and repo.ui.debugflag:

526

if fullcopy and repo.ui.debugflag:

518

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

527

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

519

"% = renamed and deleted):\n")

528

"% = renamed and deleted):\n")

520

for f in sorted(fullcopy):

529

for f in sorted(fullcopy):

521

note = ""

530

note = ""

522

if f in copy:

531

if f in copy:

523

note += "*"

532

note += "*"

524

if f in divergeset:

533

if f in divergeset:

525

note += "!"

534

note += "!"

526

if f in renamedeleteset:

535

if f in renamedeleteset:

527

note += "%"

536

note += "%"

528

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

537

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

529

note))

538

note))

530

del divergeset

539

del divergeset

531

540

532

if not fullcopy:

541

if not fullcopy:

533

return copy, {}, diverge, renamedelete, {}

542

return copy, {}, diverge, renamedelete, {}

534

543

535

repo.ui.debug(" checking for directory renames\n")

544

repo.ui.debug(" checking for directory renames\n")

536

545

537

# generate a directory move map

546

# generate a directory move map

538

d1, d2 = c1.dirs(), c2.dirs()

547

d1, d2 = c1.dirs(), c2.dirs()

539

# Hack for adding '', which is not otherwise added, to d1 and d2

548

# Hack for adding '', which is not otherwise added, to d1 and d2

540

d1.addpath('/')

549

d1.addpath('/')

541

d2.addpath('/')

550

d2.addpath('/')

542

invalid = set()

551

invalid = set()

543

dirmove = {}

552

dirmove = {}

544

553

545

# examine each file copy for a potential directory move, which is

554

# examine each file copy for a potential directory move, which is

546

# when all the files in a directory are moved to a new directory

555

# when all the files in a directory are moved to a new directory

547

for dst, src in fullcopy.iteritems():

556

for dst, src in fullcopy.iteritems():

548

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

557

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

549

if dsrc in invalid:

558

if dsrc in invalid:

550

# already seen to be uninteresting

559

# already seen to be uninteresting

551

continue

560

continue

552

elif dsrc in d1 and ddst in d1:

561

elif dsrc in d1 and ddst in d1:

553

# directory wasn't entirely moved locally

562

# directory wasn't entirely moved locally

554

invalid.add(dsrc + "/")

563

invalid.add(dsrc + "/")

555

elif dsrc in d2 and ddst in d2:

564

elif dsrc in d2 and ddst in d2:

556

# directory wasn't entirely moved remotely

565

# directory wasn't entirely moved remotely

557

invalid.add(dsrc + "/")

566

invalid.add(dsrc + "/")

558

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

567

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

559

# files from the same directory moved to two different places

568

# files from the same directory moved to two different places

560

invalid.add(dsrc + "/")

569

invalid.add(dsrc + "/")

561

else:

570

else:

562

# looks good so far

571

# looks good so far

563

dirmove[dsrc + "/"] = ddst + "/"

572

dirmove[dsrc + "/"] = ddst + "/"

564

573

565

for i in invalid:

574

for i in invalid:

566

if i in dirmove:

575

if i in dirmove:

567

del dirmove[i]

576

del dirmove[i]

568

del d1, d2, invalid

577

del d1, d2, invalid

569

578

570

if not dirmove:

579

if not dirmove:

571

return copy, {}, diverge, renamedelete, {}

580

return copy, {}, diverge, renamedelete, {}

572

581

573

for d in dirmove:

582

for d in dirmove:

574

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

583

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

575

(d, dirmove[d]))

584

(d, dirmove[d]))

576

585

577

movewithdir = {}

586

movewithdir = {}

578

# check unaccounted nonoverlapping files against directory moves

587

# check unaccounted nonoverlapping files against directory moves

579

for f in u1r + u2r:

588

for f in u1r + u2r:

580

if f not in fullcopy:

589

if f not in fullcopy:

581

for d in dirmove:

590

for d in dirmove:

582

if f.startswith(d):

591

if f.startswith(d):

583

# new file added in a directory that was moved, move it

592

# new file added in a directory that was moved, move it

584

df = dirmove[d] + f[len(d):]

593

df = dirmove[d] + f[len(d):]

585

if df not in copy:

594

if df not in copy:

586

movewithdir[f] = df

595

movewithdir[f] = df

587

repo.ui.debug((" pending file src: '%s' -> "

596

repo.ui.debug((" pending file src: '%s' -> "

588

"dst: '%s'\n") % (f, df))

597

"dst: '%s'\n") % (f, df))

589

break

598

break

590

599

591

return copy, movewithdir, diverge, renamedelete, dirmove

600

return copy, movewithdir, diverge, renamedelete, dirmove

592

601

593

def _related(f1, f2, limit):

602

def _related(f1, f2, limit):

594

"""return True if f1 and f2 filectx have a common ancestor

603

"""return True if f1 and f2 filectx have a common ancestor

595

604

596

Walk back to common ancestor to see if the two files originate

605

Walk back to common ancestor to see if the two files originate

597

from the same file. Since workingfilectx's rev() is None it messes

606

from the same file. Since workingfilectx's rev() is None it messes

598

up the integer comparison logic, hence the pre-step check for

607

up the integer comparison logic, hence the pre-step check for

599

None (f1 and f2 can only be workingfilectx's initially).

608

None (f1 and f2 can only be workingfilectx's initially).

600

"""

609

"""

601

610

602

if f1 == f2:

611

if f1 == f2:

603

return f1 # a match

612

return f1 # a match

604

613

605

g1, g2 = f1.ancestors(), f2.ancestors()

614

g1, g2 = f1.ancestors(), f2.ancestors()

606

try:

615

try:

607

f1r, f2r = f1.linkrev(), f2.linkrev()

616

f1r, f2r = f1.linkrev(), f2.linkrev()

608

617

609

if f1r is None:

618

if f1r is None:

610

f1 = next(g1)

619

f1 = next(g1)

611

if f2r is None:

620

if f2r is None:

612

f2 = next(g2)

621

f2 = next(g2)

613

622

614

while True:

623

while True:

615

f1r, f2r = f1.linkrev(), f2.linkrev()

624

f1r, f2r = f1.linkrev(), f2.linkrev()

616

if f1r > f2r:

625

if f1r > f2r:

617

f1 = next(g1)

626

f1 = next(g1)

618

elif f2r > f1r:

627

elif f2r > f1r:

619

f2 = next(g2)

628

f2 = next(g2)

620

elif f1 == f2:

629

elif f1 == f2:

621

return f1 # a match

630

return f1 # a match

622

elif f1r == f2r or f1r < limit or f2r < limit:

631

elif f1r == f2r or f1r < limit or f2r < limit:

623

return False # copy no longer relevant

632

return False # copy no longer relevant

624

except StopIteration:

633

except StopIteration:

625

return False

634

return False

626

635

627

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

636

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

628

"""

637

"""

629

check possible copies of f from msrc to mdst

638

check possible copies of f from msrc to mdst

630

639

631

srcctx = starting context for f in msrc

640

srcctx = starting context for f in msrc

632

dstctx = destination context for f in mdst

641

dstctx = destination context for f in mdst

633

f = the filename to check (as in msrc)

642

f = the filename to check (as in msrc)

634

base = the changectx used as a merge base

643

base = the changectx used as a merge base

635

tca = topological common ancestor for graft-like scenarios

644

tca = topological common ancestor for graft-like scenarios

636

remotebase = True if base is outside tca::srcctx, False otherwise

645

remotebase = True if base is outside tca::srcctx, False otherwise

637

limit = the rev number to not search beyond

646

limit = the rev number to not search beyond

638

data = dictionary of dictionary to store copy data. (see mergecopies)

647

data = dictionary of dictionary to store copy data. (see mergecopies)

639

648

640

note: limit is only an optimization, and provides no guarantee that

649

note: limit is only an optimization, and provides no guarantee that

641

irrelevant revisions will not be visited

650

irrelevant revisions will not be visited

642

there is no easy way to make this algorithm stop in a guaranteed way

651

there is no easy way to make this algorithm stop in a guaranteed way

643

once it "goes behind a certain revision".

652

once it "goes behind a certain revision".

644

"""

653

"""

645

654

646

msrc = srcctx.manifest()

655

msrc = srcctx.manifest()

647

mdst = dstctx.manifest()

656

mdst = dstctx.manifest()

648

mb = base.manifest()

657

mb = base.manifest()

649

mta = tca.manifest()

658

mta = tca.manifest()

650

# Might be true if this call is about finding backward renames,

659

# Might be true if this call is about finding backward renames,

651

# This happens in the case of grafts because the DAG is then rotated.

660

# This happens in the case of grafts because the DAG is then rotated.

652

# If the file exists in both the base and the source, we are not looking

661

# If the file exists in both the base and the source, we are not looking

653

# for a rename on the source side, but on the part of the DAG that is

662

# for a rename on the source side, but on the part of the DAG that is

654

# traversed backwards.

663

# traversed backwards.

655

#

664

#

656

# In the case there is both backward and forward renames (before and after

665

# In the case there is both backward and forward renames (before and after

657

# the base) this is more complicated as we must detect a divergence.

666

# the base) this is more complicated as we must detect a divergence.

658

# We use 'backwards = False' in that case.

667

# We use 'backwards = False' in that case.

659

backwards = not remotebase and base != tca and f in mb

668

backwards = not remotebase and base != tca and f in mb

660

getsrcfctx = _makegetfctx(srcctx)

669

getsrcfctx = _makegetfctx(srcctx)

661

getdstfctx = _makegetfctx(dstctx)

670

getdstfctx = _makegetfctx(dstctx)

662

671

663

if msrc[f] == mb.get(f) and not remotebase:

672

if msrc[f] == mb.get(f) and not remotebase:

664

# Nothing to merge

673

# Nothing to merge

665

return

674

return

666

675

667

of = None

676

of = None

668

seen = {f}

677

seen = {f}

669

for oc in getsrcfctx(f, msrc[f]).ancestors():

678

for oc in getsrcfctx(f, msrc[f]).ancestors():

670

ocr = oc.linkrev()

679

ocr = oc.linkrev()

671

of = oc.path()

680

of = oc.path()

672

if of in seen:

681

if of in seen:

673

# check limit late - grab last rename before

682

# check limit late - grab last rename before

674

if ocr < limit:

683

if ocr < limit:

675

break

684

break

676

continue

685

continue

677

seen.add(of)

686

seen.add(of)

678

687

679

# remember for dir rename detection

688

# remember for dir rename detection

680

if backwards:

689

if backwards:

681

data['fullcopy'][of] = f # grafting backwards through renames

690

data['fullcopy'][of] = f # grafting backwards through renames

682

else:

691

else:

683

data['fullcopy'][f] = of

692

data['fullcopy'][f] = of

684

if of not in mdst:

693

if of not in mdst:

685

continue # no match, keep looking

694

continue # no match, keep looking

686

if mdst[of] == mb.get(of):

695

if mdst[of] == mb.get(of):

687

return # no merge needed, quit early

696

return # no merge needed, quit early

688

c2 = getdstfctx(of, mdst[of])

697

c2 = getdstfctx(of, mdst[of])

689

# c2 might be a plain new file on added on destination side that is

698

# c2 might be a plain new file on added on destination side that is

690

# unrelated to the droids we are looking for.

699

# unrelated to the droids we are looking for.

691

cr = _related(oc, c2, tca.rev())

700

cr = _related(oc, c2, tca.rev())

692

if cr and (of == f or of == c2.path()): # non-divergent

701

if cr and (of == f or of == c2.path()): # non-divergent

693

if backwards:

702

if backwards:

694

data['copy'][of] = f

703

data['copy'][of] = f

695

elif of in mb:

704

elif of in mb:

696

data['copy'][f] = of

705

data['copy'][f] = of

697

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

706

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

698

data['copy'][of] = f

707

data['copy'][of] = f

699

del data['fullcopy'][f]

708

del data['fullcopy'][f]

700

data['fullcopy'][of] = f

709

data['fullcopy'][of] = f

701

else: # divergence w.r.t. graft CA on one side of topological CA

710

else: # divergence w.r.t. graft CA on one side of topological CA

702

for sf in seen:

711

for sf in seen:

703

if sf in mb:

712

if sf in mb:

704

assert sf not in data['diverge']

713

assert sf not in data['diverge']

705

data['diverge'][sf] = [f, of]

714

data['diverge'][sf] = [f, of]

706

break

715

break

707

return

716

return

708

717

709

if of in mta:

718

if of in mta:

710

if backwards or remotebase:

719

if backwards or remotebase:

711

data['incomplete'][of] = f

720

data['incomplete'][of] = f

712

else:

721

else:

713

for sf in seen:

722

for sf in seen:

714

if sf in mb:

723

if sf in mb:

715

if tca == base:

724

if tca == base:

716

data['diverge'].setdefault(sf, []).append(f)

725

data['diverge'].setdefault(sf, []).append(f)

717

else:

726

else:

718

data['incompletediverge'][sf] = [of, f]

727

data['incompletediverge'][sf] = [of, f]

719

return

728

return

720

729

721

def duplicatecopies(repo, rev, fromrev, skiprev=None):

730

def duplicatecopies(repo, rev, fromrev, skiprev=None):

722

'''reproduce copies from fromrev to rev in the dirstate

731

'''reproduce copies from fromrev to rev in the dirstate

723

732

724

If skiprev is specified, it's a revision that should be used to

733

If skiprev is specified, it's a revision that should be used to

725

filter copy records. Any copies that occur between fromrev and

734

filter copy records. Any copies that occur between fromrev and

726

skiprev will not be duplicated, even if they appear in the set of

735

skiprev will not be duplicated, even if they appear in the set of

727

copies between fromrev and rev.

736

copies between fromrev and rev.

728

'''

737

'''

729

exclude = {}

738

exclude = {}

730

if (skiprev is not None and

739

if (skiprev is not None and

731

repo.ui.config('experimental', 'copytrace') != 'off'):

740

repo.ui.config('experimental', 'copytrace') != 'off'):

732

# copytrace='off' skips this line, but not the entire function because

741

# copytrace='off' skips this line, but not the entire function because

733

# the line below is O(size of the repo) during a rebase, while the rest

742

# the line below is O(size of the repo) during a rebase, while the rest

734

# of the function is much faster (and is required for carrying copy

743

# of the function is much faster (and is required for carrying copy

735

# metadata across the rebase anyway).

744

# metadata across the rebase anyway).

736

exclude = pathcopies(repo[fromrev], repo[skiprev])

745

exclude = pathcopies(repo[fromrev], repo[skiprev])

737

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

746

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

738

# copies.pathcopies returns backward renames, so dst might not

747

# copies.pathcopies returns backward renames, so dst might not

739

# actually be in the dirstate

748

# actually be in the dirstate

740

if dst in exclude:

749

if dst in exclude:

741

continue

750

continue

742

if repo.dirstate[dst] in "nma":

751

if repo.dirstate[dst] in "nma":

743

repo.dirstate.copy(src, dst)

752

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import heapq
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirnodes:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
-                The basic algorithm for copytracing. Copytracing is used in commands like
+                The function calling different copytracing algorithms on the basis of config
-                rebase, merge, unshelve, etc to merge files that were moved/ copied in one
+                which find moves and copies between context c1 and c2 that are relevant for
-                merge parent and modified in another. For example:
+                merging. 'base' will be used as the merge base.
+                Copytracing is used in commands like rebase, merge, unshelve, etc to merge
+                files that were moved/ copied in one merge parent and modified in another.
+                For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
-                If copytrace is enabled, this function finds all the new files that were
-                added from merge base up to the top commit (here 4), and for each file it
-                checks if this file was copied from another file (a.txt in the above case).
-                Find moves and copies between context c1 and c2 that are relevant
-                for merging. 'base' will be used as the merge base.
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
+                copytracing = repo.ui.config('experimental', 'copytrace')
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
-                if repo.ui.config('experimental', 'copytrace') == 'off':
+                if copytracing == 'off':
                     return {}, {}, {}, {}, {}
+                else:
+                    return _fullcopytracing(repo, c1, c2, base)
+            def _fullcopytracing(repo, c1, c2, base):
+                """ The full copytracing algorithm which finds all the new files that were
+                added from merge base up to the top commit and for each file it checks if
+                this file was copied from another file.
+                This is pretty slow when a lot of changesets are involved but will track all
+                the copies.
+                """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.descendant(wc) and base.descendant(base) are False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not (base == _c1 or base.descendant(_c1))
                 dirtyc2 = not (base == _c2 or base.descendant(_c2))
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb)
                 addedinm2 = m2.filesnotin(mb)
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
                                                                 m2.filesnotin(mta),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _related(f1, f2, limit):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         elif f1 == f2:
                             return f1 # a match
                         elif f1r == f2r or f1r < limit or f2r < limit:
                             return False # copy no longer relevant
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, tca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if (skiprev is not None and
                     repo.ui.config('experimental', 'copytrace') != 'off'):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)

+            Test for the full copytracing algorithm
+            =======================================
               $ hg init t
               $ cd t
               $ echo 1 > a
               $ hg ci -qAm "first"
               $ hg cp a b
               $ hg mv a c
               $ echo 2 >> b
               $ echo 2 >> c
               $ hg ci -qAm "second"
               $ hg co -C 0
 files updated, 0 files merged, 2 files removed, 0 files unresolved
               $ echo 0 > a
               $ echo 1 >> a
               $ hg ci -qAm "other"
               $ hg merge --debug
                 searching for copies back to rev 1
                 unmatched files in other:
                  b
                  c
                 all copies found (* = to merge, ! = divergent, % = renamed and deleted):
                  src: 'a' -> dst: 'b' *
                  src: 'a' -> dst: 'c' *
                 checking for directory renames
               resolving manifests
                branchmerge: True, force: False, partial: False
                ancestor: b8bf91eeebbc, local: add3f11052fa+, remote: 17c05bb7fcb6
                preserving a for resolve of b
                preserving a for resolve of c
               removing a
               starting 4 threads for background file closing (?)
                b: remote moved from a -> m (premerge)
               picked tool ':merge' for b (binary False symlink False changedelete False)
               merging a and b to b
               my b@add3f11052fa+ other b@17c05bb7fcb6 ancestor a@b8bf91eeebbc
                premerge successful
                c: remote moved from a -> m (premerge)
               picked tool ':merge' for c (binary False symlink False changedelete False)
               merging a and c to c
               my c@add3f11052fa+ other c@17c05bb7fcb6 ancestor a@b8bf91eeebbc
                premerge successful
 files updated, 2 files merged, 0 files removed, 0 files unresolved
               (branch merge, don't forget to commit)
             file b
               $ cat b
             file c
               $ cat c
             Test disabling copy tracing
             - first verify copy metadata was kept
               $ hg up -qC 2
               $ hg rebase --keep -d 1 -b 2 --config extensions.rebase=
               rebasing 2:add3f11052fa "other" (tip)
               merging b and a to b
               merging c and a to c
               $ cat b
             - next verify copy metadata is lost when disabled
               $ hg strip -r . --config extensions.strip=
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               saved backup bundle to $TESTTMP/t/.hg/strip-backup/550bd84c0cd3-fc575957-backup.hg (glob)
               $ hg up -qC 2
               $ hg rebase --keep -d 1 -b 2 --config extensions.rebase= --config experimental.copytrace=off --config ui.interactive=True << EOF
               > c
               > EOF
               rebasing 2:add3f11052fa "other" (tip)
               other [source] changed a which local [dest] deleted
               use (c)hanged version, leave (d)eleted, or leave (u)nresolved? c
               $ cat b
               $ cd ..
             Verify disabling copy tracing still keeps copies from rebase source
               $ hg init copydisable
               $ cd copydisable
               $ touch a
               $ hg ci -Aqm 'add a'
               $ touch b
               $ hg ci -Aqm 'add b, c'
               $ hg cp b x
               $ echo x >> x
               $ hg ci -qm 'copy b->x'
               $ hg up -q 1
               $ touch z
               $ hg ci -Aqm 'add z'
               $ hg log -G -T '{rev} {desc}\n'
               @  3 add z
               |
               | o  2 copy b->x
               |/
               o  1 add b, c
               |
               o  0 add a
               $ hg rebase -d . -b 2 --config extensions.rebase= --config experimental.copytrace=off
               rebasing 2:6adcf8c12e7d "copy b->x"
               saved backup bundle to $TESTTMP/copydisable/.hg/strip-backup/6adcf8c12e7d-ce4b3e75-rebase.hg (glob)
               $ hg up -q 3
               $ hg log -f x -T '{rev} {desc}\n'
 copy b->x
 add b, c
               $ cd ../
             Verify we duplicate existing copies, instead of detecting them
               $ hg init copydisable3
               $ cd copydisable3
               $ touch a
               $ hg ci -Aqm 'add a'
               $ hg cp a b
               $ hg ci -Aqm 'copy a->b'
               $ hg mv b c
               $ hg ci -Aqm 'move b->c'
               $ hg up -q 0
               $ hg cp a b
               $ echo b >> b
               $ hg ci -Aqm 'copy a->b (2)'
               $ hg log -G -T '{rev} {desc}\n'
               @  3 copy a->b (2)
               |
               | o  2 move b->c
               | |
               | o  1 copy a->b
               |/
               o  0 add a
               $ hg rebase -d 2 -s 3 --config extensions.rebase= --config experimental.copytrace=off
               rebasing 3:47e1a9e6273b "copy a->b (2)" (tip)
               saved backup bundle to $TESTTMP/copydisable3/.hg/strip-backup/47e1a9e6273b-2d099c59-rebase.hg (glob)
               $ hg log -G -f b
               @  changeset:   3:76024fb4b05b
               :  tag:         tip
               :  user:        test
               :  date:        Thu Jan 01 00:00:00 1970 +0000
               :  summary:     copy a->b (2)
               :
               o  changeset:   0:ac82d8b1f7c4
                  user:        test
                  date:        Thu Jan 01 00:00:00 1970 +0000
                  summary:     add a