upstream/mercurial-mirror Commit - r37410:a4f02a17

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

24

def _findlimit(repo, a, b):

24

def _findlimit(repo, a, b):

25

"""

25

"""

26

Find the last revision that needs to be checked to ensure that a full

26

Find the last revision that needs to be checked to ensure that a full

27

transitive closure for file copies can be properly calculated.

27

transitive closure for file copies can be properly calculated.

28

Generally, this means finding the earliest revision number that's an

28

Generally, this means finding the earliest revision number that's an

29

ancestor of a or b but not both, except when a or b is a direct descendent

29

ancestor of a or b but not both, except when a or b is a direct descendent

30

of the other, in which case we can return the minimum revnum of a and b.

30

of the other, in which case we can return the minimum revnum of a and b.

31

None if no such revision exists.

31

None if no such revision exists.

32

"""

32

"""

33

34

# basic idea:

34

# basic idea:

35

# - mark a and b with different sides

35

# - mark a and b with different sides

36

# - if a parent's children are all on the same side, the parent is

36

# - if a parent's children are all on the same side, the parent is

37

# on that side, otherwise it is on no side

37

# on that side, otherwise it is on no side

38

# - walk the graph in topological order with the help of a heap;

38

# - walk the graph in topological order with the help of a heap;

39

# - add unseen parents to side map

39

# - add unseen parents to side map

40

# - clear side of any parent that has children on different sides

40

# - clear side of any parent that has children on different sides

41

# - track number of interesting revs that might still be on a side

41

# - track number of interesting revs that might still be on a side

42

# - track the lowest interesting rev seen

42

# - track the lowest interesting rev seen

43

# - quit when interesting revs is zero

43

# - quit when interesting revs is zero

44

45

cl = repo.changelog

45

cl = repo.changelog

46

working = len(cl) # pseudo rev for the working directory

46

working = len(cl) # pseudo rev for the working directory

47

if a is None:

47

if a is None:

48

a = working

48

a = working

49

if b is None:

49

if b is None:

50

b = working

50

b = working

51

52

side = {a: -1, b: 1}

52

side = {a: -1, b: 1}

53

visit = [-a, -b]

53

visit = [-a, -b]

54

heapq.heapify(visit)

54

heapq.heapify(visit)

55

interesting = len(visit)

55

interesting = len(visit)

56

hascommonancestor = False

56

hascommonancestor = False

57

limit = working

57

limit = working

58

59

while interesting:

59

while interesting:

60

r = -heapq.heappop(visit)

60

r = -heapq.heappop(visit)

61

if r == working:

61

if r == working:

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

63

else:

63

else:

64

parents = cl.parentrevs(r)

64

parents = cl.parentrevs(r)

65

for p in parents:

65

for p in parents:

66

if p < 0:

66

if p < 0:

67

continue

67

continue

68

if p not in side:

68

if p not in side:

69

# first time we see p; add it to visit

69

# first time we see p; add it to visit

70

side[p] = side[r]

70

side[p] = side[r]

71

if side[p]:

71

if side[p]:

72

interesting += 1

72

interesting += 1

73

heapq.heappush(visit, -p)

73

heapq.heappush(visit, -p)

74

elif side[p] and side[p] != side[r]:

74

elif side[p] and side[p] != side[r]:

75

# p was interesting but now we know better

75

# p was interesting but now we know better

76

side[p] = 0

76

side[p] = 0

77

interesting -= 1

77

interesting -= 1

78

hascommonancestor = True

78

hascommonancestor = True

79

if side[r]:

79

if side[r]:

80

limit = r # lowest rev visited

80

limit = r # lowest rev visited

81

interesting -= 1

81

interesting -= 1

82

83

if not hascommonancestor:

83

if not hascommonancestor:

84

return None

84

return None

85

86

# Consider the following flow (see test-commit-amend.t under issue4405):

86

# Consider the following flow (see test-commit-amend.t under issue4405):

87

# 1/ File 'a0' committed

87

# 1/ File 'a0' committed

88

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

88

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

89

# 3/ Move back to first commit

89

# 3/ Move back to first commit

90

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

90

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

91

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

91

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

92

#

92

#

93

# During the amend in step five, we will be in this state:

93

# During the amend in step five, we will be in this state:

94

#

94

#

95

# @ 3 temporary amend commit for a1-amend

95

# @ 3 temporary amend commit for a1-amend

96

# |

96

# |

97

# o 2 a1-amend

97

# o 2 a1-amend

98

# |

98

# |

99

# | o 1 a1

99

# | o 1 a1

100

# |/

100

# |/

101

# o 0 a0

101

# o 0 a0

102

#

102

#

103

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

103

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

104

# yet the filelog has the copy information in rev 1 and we will not look

104

# yet the filelog has the copy information in rev 1 and we will not look

105

# back far enough unless we also look at the a and b as candidates.

105

# back far enough unless we also look at the a and b as candidates.

106

# This only occurs when a is a descendent of b or visa-versa.

106

# This only occurs when a is a descendent of b or visa-versa.

107

return min(limit, a, b)

107

return min(limit, a, b)

108

109

def _chain(src, dst, a, b):

109

def _chain(src, dst, a, b):

110

"""chain two sets of copies a->b"""

110

"""chain two sets of copies a->b"""

111

t = a.copy()

111

t = a.copy()

112

for k, v in b.iteritems():

112

for k, v in b.iteritems():

113

if v in t:

113

if v in t:

114

# found a chain

114

# found a chain

115

if t[v] != k:

115

if t[v] != k:

116

# file wasn't renamed back to itself

116

# file wasn't renamed back to itself

117

t[k] = t[v]

117

t[k] = t[v]

118

if v not in dst:

118

if v not in dst:

119

# chain was a rename, not a copy

119

# chain was a rename, not a copy

120

del t[v]

120

del t[v]

121

if v in src:

121

if v in src:

122

# file is a copy of an existing file

122

# file is a copy of an existing file

123

t[k] = v

123

t[k] = v

124

125

# remove criss-crossed copies

125

# remove criss-crossed copies

126

for k, v in list(t.items()):

126

for k, v in list(t.items()):

127

if k in src and v in dst:

127

if k in src and v in dst:

128

del t[k]

128

del t[k]

129

130

return t

130

return t

131

132

def _tracefile(fctx, am, limit=-1):

132

def _tracefile(fctx, am, limit=-1):

133

"""return file context that is the ancestor of fctx present in ancestor

133

"""return file context that is the ancestor of fctx present in ancestor

134

manifest am, stopping after the first ancestor lower than limit"""

134

manifest am, stopping after the first ancestor lower than limit"""

135

136

for f in fctx.ancestors():

136

for f in fctx.ancestors():

137

if am.get(f.path(), None) == f.filenode():

137

if am.get(f.path(), None) == f.filenode():

138

return f

138

return f

139

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

139

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

140

return None

140

return None

141

142

def _dirstatecopies(d, match=None):

142

def _dirstatecopies(d, match=None):

143

ds = d._repo.dirstate

143

ds = d._repo.dirstate

144

c = ds.copies().copy()

144

c = ds.copies().copy()

145

for k in list(c):

145

for k in list(c):

146

if ds[k] not in 'anm' or (match and not match(k)):

146

if ds[k] not in 'anm' or (match and not match(k)):

147

del c[k]

147

del c[k]

148

return c

148

return c

149

150

def _computeforwardmissing(a, b, match=None):

150

def _computeforwardmissing(a, b, match=None):

151

"""Computes which files are in b but not a.

151

"""Computes which files are in b but not a.

152

This is its own function so extensions can easily wrap this call to see what

152

This is its own function so extensions can easily wrap this call to see what

153

files _forwardcopies is about to process.

153

files _forwardcopies is about to process.

154

"""

154

"""

155

ma = a.manifest()

155

ma = a.manifest()

156

mb = b.manifest()

156

mb = b.manifest()

157

return mb.filesnotin(ma, match=match)

157

return mb.filesnotin(ma, match=match)

158

159

def _committedforwardcopies(a, b, match):

159

def _committedforwardcopies(a, b, match):

160

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

160

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

161

# files might have to be traced back to the fctx parent of the last

161

# files might have to be traced back to the fctx parent of the last

162

# one-side-only changeset, but not further back than that

162

# one-side-only changeset, but not further back than that

163

limit = _findlimit(a._repo, a.rev(), b.rev())

163

limit = _findlimit(a._repo, a.rev(), b.rev())

164

if limit is None:

164

if limit is None:

165

limit = -1

165

limit = -1

166

am = a.manifest()

166

am = a.manifest()

167

168

# find where new files came from

168

# find where new files came from

169

# we currently don't try to find where old files went, too expensive

169

# we currently don't try to find where old files went, too expensive

170

# this means we can miss a case like 'hg rm b; hg cp a b'

170

# this means we can miss a case like 'hg rm b; hg cp a b'

171

cm = {}

171

cm = {}

172

173

# Computing the forward missing is quite expensive on large manifests, since

173

# Computing the forward missing is quite expensive on large manifests, since

174

# it compares the entire manifests. We can optimize it in the common use

174

# it compares the entire manifests. We can optimize it in the common use

175

# case of computing what copies are in a commit versus its parent (like

175

# case of computing what copies are in a commit versus its parent (like

176

# during a rebase or histedit). Note, we exclude merge commits from this

176

# during a rebase or histedit). Note, we exclude merge commits from this

177

# optimization, since the ctx.files() for a merge commit is not correct for

177

# optimization, since the ctx.files() for a merge commit is not correct for

178

# this comparison.

178

# this comparison.

179

forwardmissingmatch = match

179

forwardmissingmatch = match

180

if b.p1() == a and b.p2().node() == node.nullid:

180

if b.p1() == a and b.p2().node() == node.nullid:

181

filesmatcher = scmutil.matchfiles(a._repo, b.files())

181

filesmatcher = scmutil.matchfiles(a._repo, b.files())

182

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

182

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

183

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

183

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

184

185

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

185

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

186

for f in missing:

186

for f in missing:

187

fctx = b[f]

187

fctx = b[f]

188

fctx._ancestrycontext = ancestrycontext

188

fctx._ancestrycontext = ancestrycontext

189

ofctx = _tracefile(fctx, am, limit)

189

ofctx = _tracefile(fctx, am, limit)

190

if ofctx:

190

if ofctx:

191

cm[f] = ofctx.path()

191

cm[f] = ofctx.path()

192

return cm

192

return cm

193

194

def _forwardcopies(a, b, match=None):

194

def _forwardcopies(a, b, match=None):

195

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

195

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

196

197

# check for working copy

197

# check for working copy

198

if b.rev() is None:

198

if b.rev() is None:

199

if a == b.p1():

199

if a == b.p1():

200

# short-circuit to avoid issues with merge states

200

# short-circuit to avoid issues with merge states

201

return _dirstatecopies(b, match)

201

return _dirstatecopies(b, match)

202

203

cm = _committedforwardcopies(a, b.p1(), match)

203

cm = _committedforwardcopies(a, b.p1(), match)

204

# combine copies from dirstate if necessary

204

# combine copies from dirstate if necessary

205

return _chain(a, b, cm, _dirstatecopies(b, match))

205

return _chain(a, b, cm, _dirstatecopies(b, match))

206

return _committedforwardcopies(a, b, match)

206

return _committedforwardcopies(a, b, match)

207

208

def _backwardrenames(a, b):

208

def _backwardrenames(a, b):

209

if a._repo.ui.config('experimental', 'copytrace') == 'off':

209

if a._repo.ui.config('experimental', 'copytrace') == 'off':

210

return {}

210

return {}

211

212

# Even though we're not taking copies into account, 1:n rename situations

212

# Even though we're not taking copies into account, 1:n rename situations

213

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

213

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

214

# arbitrarily pick one of the renames.

214

# arbitrarily pick one of the renames.

215

f = _forwardcopies(b, a)

215

f = _forwardcopies(b, a)

216

r = {}

216

r = {}

217

for k, v in sorted(f.iteritems()):

217

for k, v in sorted(f.iteritems()):

218

# remove copies

218

# remove copies

219

if v in a:

219

if v in a:

220

continue

220

continue

221

r[v] = k

221

r[v] = k

222

return r

222

return r

223

224

def pathcopies(x, y, match=None):

224

def pathcopies(x, y, match=None):

225

"""find {dst@y: src@x} copy mapping for directed compare"""

225

"""find {dst@y: src@x} copy mapping for directed compare"""

226

if x == y or not x or not y:

226

if x == y or not x or not y:

227

return {}

227

return {}

228

a = y.ancestor(x)

228

a = y.ancestor(x)

229

if a == x:

229

if a == x:

230

return _forwardcopies(x, y, match=match)

230

return _forwardcopies(x, y, match=match)

231

if a == y:

231

if a == y:

232

return _backwardrenames(x, y)

232

return _backwardrenames(x, y)

233

return _chain(x, y, _backwardrenames(x, a),

233

return _chain(x, y, _backwardrenames(x, a),

234

_forwardcopies(a, y, match=match))

234

_forwardcopies(a, y, match=match))

235

236

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

236

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

237

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

237

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

238

and c2. This is its own function so extensions can easily wrap this call

238

and c2. This is its own function so extensions can easily wrap this call

239

to see what files mergecopies is about to process.

239

to see what files mergecopies is about to process.

240

241

Even though c1 and c2 are not used in this function, they are useful in

241

Even though c1 and c2 are not used in this function, they are useful in

242

other extensions for being able to read the file nodes of the changed files.

242

other extensions for being able to read the file nodes of the changed files.

243

244

"baselabel" can be passed to help distinguish the multiple computations

244

"baselabel" can be passed to help distinguish the multiple computations

245

done in the graft case.

245

done in the graft case.

246

"""

246

"""

247

u1 = sorted(addedinm1 - addedinm2)

247

u1 = sorted(addedinm1 - addedinm2)

248

u2 = sorted(addedinm2 - addedinm1)

248

u2 = sorted(addedinm2 - addedinm1)

249

250

header = " unmatched files in %s"

250

header = " unmatched files in %s"

251

if baselabel:

251

if baselabel:

252

header += ' (from %s)' % baselabel

252

header += ' (from %s)' % baselabel

253

if u1:

253

if u1:

254

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

254

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

255

if u2:

255

if u2:

256

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

256

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

257

return u1, u2

257

return u1, u2

258

259

def _makegetfctx(ctx):

259

def _makegetfctx(ctx):

260

"""return a 'getfctx' function suitable for _checkcopies usage

260

"""return a 'getfctx' function suitable for _checkcopies usage

261

262

We have to re-setup the function building 'filectx' for each

262

We have to re-setup the function building 'filectx' for each

263

'_checkcopies' to ensure the linkrev adjustment is properly setup for

263

'_checkcopies' to ensure the linkrev adjustment is properly setup for

264

each. Linkrev adjustment is important to avoid bug in rename

264

each. Linkrev adjustment is important to avoid bug in rename

265

detection. Moreover, having a proper '_ancestrycontext' setup ensures

265

detection. Moreover, having a proper '_ancestrycontext' setup ensures

266

the performance impact of this adjustment is kept limited. Without it,

266

the performance impact of this adjustment is kept limited. Without it,

267

each file could do a full dag traversal making the time complexity of

267

each file could do a full dag traversal making the time complexity of

268

the operation explode (see issue4537).

268

the operation explode (see issue4537).

269

270

This function exists here mostly to limit the impact on stable. Feel

270

This function exists here mostly to limit the impact on stable. Feel

271

free to refactor on default.

271

free to refactor on default.

272

"""

272

"""

273

rev = ctx.rev()

273

rev = ctx.rev()

274

repo = ctx._repo

274

repo = ctx._repo

275

ac = getattr(ctx, '_ancestrycontext', None)

275

ac = getattr(ctx, '_ancestrycontext', None)

276

if ac is None:

276

if ac is None:

277

revs = [rev]

277

revs = [rev]

278

if rev is None:

278

if rev is None:

279

revs = [p.rev() for p in ctx.parents()]

279

revs = [p.rev() for p in ctx.parents()]

280

ac = repo.changelog.ancestors(revs, inclusive=True)

280

ac = repo.changelog.ancestors(revs, inclusive=True)

281

ctx._ancestrycontext = ac

281

ctx._ancestrycontext = ac

282

def makectx(f, n):

282

def makectx(f, n):

283

if n in node.wdirnodes: # in a working context?

283

if n in node.wdirnodes: # in a working context?

284

if ctx.rev() is None:

284

if ctx.rev() is None:

285

return ctx.filectx(f)

285

return ctx.filectx(f)

286

return repo[None][f]

286

return repo[None][f]

287

fctx = repo.filectx(f, fileid=n)

287

fctx = repo.filectx(f, fileid=n)

288

# setup only needed for filectx not create from a changectx

288

# setup only needed for filectx not create from a changectx

289

fctx._ancestrycontext = ac

289

fctx._ancestrycontext = ac

290

fctx._descendantrev = rev

290

fctx._descendantrev = rev

291

return fctx

291

return fctx

292

return util.lrucachefunc(makectx)

292

return util.lrucachefunc(makectx)

293

294

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

294

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

295

"""combine partial copy paths"""

295

"""combine partial copy paths"""

296

remainder = {}

296

remainder = {}

297

for f in copyfrom:

297

for f in copyfrom:

298

if f in copyto:

298

if f in copyto:

299

finalcopy[copyto[f]] = copyfrom[f]

299

finalcopy[copyto[f]] = copyfrom[f]

300

del copyto[f]

300

del copyto[f]

301

for f in incompletediverge:

301

for f in incompletediverge:

302

assert f not in diverge

302

assert f not in diverge

303

ic = incompletediverge[f]

303

ic = incompletediverge[f]

304

if ic[0] in copyto:

304

if ic[0] in copyto:

305

diverge[f] = [copyto[ic[0]], ic[1]]

305

diverge[f] = [copyto[ic[0]], ic[1]]

306

else:

306

else:

307

remainder[f] = ic

307

remainder[f] = ic

308

return remainder

308

return remainder

309

310

def mergecopies(repo, c1, c2, base):

310

def mergecopies(repo, c1, c2, base):

311

"""

311

"""

312

The function calling different copytracing algorithms on the basis of config

312

The function calling different copytracing algorithms on the basis of config

313

which find moves and copies between context c1 and c2 that are relevant for

313

which find moves and copies between context c1 and c2 that are relevant for

314

merging. 'base' will be used as the merge base.

314

merging. 'base' will be used as the merge base.

315

316

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

316

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

317

files that were moved/ copied in one merge parent and modified in another.

317

files that were moved/ copied in one merge parent and modified in another.

318

For example:

318

For example:

319

320

o ---> 4 another commit

320

o ---> 4 another commit

321

|

321

|

322

| o ---> 3 commit that modifies a.txt

322

| o ---> 3 commit that modifies a.txt

323

| /

323

| /

324

o / ---> 2 commit that moves a.txt to b.txt

324

o / ---> 2 commit that moves a.txt to b.txt

325

|/

325

|/

326

o ---> 1 merge base

326

o ---> 1 merge base

327

328

If we try to rebase revision 3 on revision 4, since there is no a.txt in

328

If we try to rebase revision 3 on revision 4, since there is no a.txt in

329

revision 4, and if user have copytrace disabled, we prints the following

329

revision 4, and if user have copytrace disabled, we prints the following

330

message:

330

message:

331

332

```other changed <file> which local deleted```

332

```other changed <file> which local deleted```

333

334

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

334

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

335

"dirmove".

335

"dirmove".

336

337

"copy" is a mapping from destination name -> source name,

337

"copy" is a mapping from destination name -> source name,

338

where source is in c1 and destination is in c2 or vice-versa.

338

where source is in c1 and destination is in c2 or vice-versa.

339

340

"movewithdir" is a mapping from source name -> destination name,

340

"movewithdir" is a mapping from source name -> destination name,

341

where the file at source present in one context but not the other

341

where the file at source present in one context but not the other

342

needs to be moved to destination by the merge process, because the

342

needs to be moved to destination by the merge process, because the

343

other context moved the directory it is in.

343

other context moved the directory it is in.

344

345

"diverge" is a mapping of source name -> list of destination names

345

"diverge" is a mapping of source name -> list of destination names

346

for divergent renames.

346

for divergent renames.

347

348

"renamedelete" is a mapping of source name -> list of destination

348

"renamedelete" is a mapping of source name -> list of destination

349

names for files deleted in c1 that were renamed in c2 or vice-versa.

349

names for files deleted in c1 that were renamed in c2 or vice-versa.

350

351

"dirmove" is a mapping of detected source dir -> destination dir renames.

351

"dirmove" is a mapping of detected source dir -> destination dir renames.

352

This is needed for handling changes to new files previously grafted into

352

This is needed for handling changes to new files previously grafted into

353

renamed directories.

353

renamed directories.

354

"""

354

"""

355

# avoid silly behavior for update from empty dir

355

# avoid silly behavior for update from empty dir

356

if not c1 or not c2 or c1 == c2:

356

if not c1 or not c2 or c1 == c2:

357

return {}, {}, {}, {}, {}

357

return {}, {}, {}, {}, {}

358

359

# avoid silly behavior for parent -> working dir

359

# avoid silly behavior for parent -> working dir

360

if c2.node() is None and c1.node() == repo.dirstate.p1():

360

if c2.node() is None and c1.node() == repo.dirstate.p1():

361

return repo.dirstate.copies(), {}, {}, {}, {}

361

return repo.dirstate.copies(), {}, {}, {}, {}

362

363

copytracing = repo.ui.config('experimental', 'copytrace')

363

copytracing = repo.ui.config('experimental', 'copytrace')

364

365

# Copy trace disabling is explicitly below the node == p1 logic above

365

# Copy trace disabling is explicitly below the node == p1 logic above

366

# because the logic above is required for a simple copy to be kept across a

366

# because the logic above is required for a simple copy to be kept across a

367

# rebase.

367

# rebase.

368

if copytracing == 'off':

368

if copytracing == 'off':

369

return {}, {}, {}, {}, {}

369

return {}, {}, {}, {}, {}

370

elif copytracing == 'heuristics':

370

elif copytracing == 'heuristics':

371

# Do full copytracing if only non-public revisions are involved as

371

# Do full copytracing if only non-public revisions are involved as

372

# that will be fast enough and will also cover the copies which could

372

# that will be fast enough and will also cover the copies which could

373

# be missed by heuristics

373

# be missed by heuristics

374

if _isfullcopytraceable(repo, c1, base):

374

if _isfullcopytraceable(repo, c1, base):

375

return _fullcopytracing(repo, c1, c2, base)

375

return _fullcopytracing(repo, c1, c2, base)

376

return _heuristicscopytracing(repo, c1, c2, base)

376

return _heuristicscopytracing(repo, c1, c2, base)

377

else:

377

else:

378

return _fullcopytracing(repo, c1, c2, base)

378

return _fullcopytracing(repo, c1, c2, base)

379

380

def _isfullcopytraceable(repo, c1, base):

380

def _isfullcopytraceable(repo, c1, base):

381

""" Checks that if base, source and destination are all no-public branches,

381

""" Checks that if base, source and destination are all no-public branches,

382

if yes let's use the full copytrace algorithm for increased capabilities

382

if yes let's use the full copytrace algorithm for increased capabilities

383

since it will be fast enough.

383

since it will be fast enough.

384

385

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

385

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

386

number of changesets from c1 to base such that if number of changesets are

386

number of changesets from c1 to base such that if number of changesets are

387

more than the limit, full copytracing algorithm won't be used.

387

more than the limit, full copytracing algorithm won't be used.

388

"""

388

"""

389

if c1.rev() is None:

389

if c1.rev() is None:

390

c1 = c1.p1()

390

c1 = c1.p1()

391

if c1.mutable() and base.mutable():

391

if c1.mutable() and base.mutable():

392

sourcecommitlimit = repo.ui.configint('experimental',

392

sourcecommitlimit = repo.ui.configint('experimental',

393

'copytrace.sourcecommitlimit')

393

'copytrace.sourcecommitlimit')

394

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

394

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

395

return commits < sourcecommitlimit

395

return commits < sourcecommitlimit

396

return False

396

return False

397

398

def _fullcopytracing(repo, c1, c2, base):

398

def _fullcopytracing(repo, c1, c2, base):

399

""" The full copytracing algorithm which finds all the new files that were

399

""" The full copytracing algorithm which finds all the new files that were

400

added from merge base up to the top commit and for each file it checks if

400

added from merge base up to the top commit and for each file it checks if

401

this file was copied from another file.

401

this file was copied from another file.

402

403

This is pretty slow when a lot of changesets are involved but will track all

403

This is pretty slow when a lot of changesets are involved but will track all

404

the copies.

404

the copies.

405

"""

405

"""

406

# In certain scenarios (e.g. graft, update or rebase), base can be

406

# In certain scenarios (e.g. graft, update or rebase), base can be

407

# overridden We still need to know a real common ancestor in this case We

407

# overridden We still need to know a real common ancestor in this case We

408

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

408

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

409

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

409

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

410

# caller may not know if the revision passed in lieu of the CA is a genuine

410

# caller may not know if the revision passed in lieu of the CA is a genuine

411

# common ancestor or not without explicitly checking it, it's better to

411

# common ancestor or not without explicitly checking it, it's better to

412

# determine that here.

412

# determine that here.

413

#

413

#

414

# base.descendant(wc) and base.descendant(base) are False, work around that

414

# base.descendant(wc) and base.descendant(base) are False, work around that

415

_c1 = c1.p1() if c1.rev() is None else c1

415

_c1 = c1.p1() if c1.rev() is None else c1

416

_c2 = c2.p1() if c2.rev() is None else c2

416

_c2 = c2.p1() if c2.rev() is None else c2

417

# an endpoint is "dirty" if it isn't a descendant of the merge base

417

# an endpoint is "dirty" if it isn't a descendant of the merge base

418

# if we have a dirty endpoint, we need to trigger graft logic, and also

418

# if we have a dirty endpoint, we need to trigger graft logic, and also

419

# keep track of which endpoint is dirty

419

# keep track of which endpoint is dirty

420

dirtyc1 = not (base == _c1 or base.descendant(_c1))

420

dirtyc1 = not (base == _c1 or base.descendant(_c1))

421

dirtyc2 = not (base == _c2 or base.descendant(_c2))

421

dirtyc2 = not (base == _c2 or base.descendant(_c2))

422

graft = dirtyc1 or dirtyc2

422

graft = dirtyc1 or dirtyc2

423

tca = base

423

tca = base

424

if graft:

424

if graft:

425

tca = _c1.ancestor(_c2)

425

tca = _c1.ancestor(_c2)

426

427

limit = _findlimit(repo, c1.rev(), c2.rev())

427

limit = _findlimit(repo, c1.rev(), c2.rev())

428

if limit is None:

428

if limit is None:

429

# no common ancestor, no copies

429

# no common ancestor, no copies

430

return {}, {}, {}, {}, {}

430

return {}, {}, {}, {}, {}

431

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

431

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

432

433

m1 = c1.manifest()

433

m1 = c1.manifest()

434

m2 = c2.manifest()

434

m2 = c2.manifest()

435

mb = base.manifest()

435

mb = base.manifest()

436

437

# gather data from _checkcopies:

437

# gather data from _checkcopies:

438

# - diverge = record all diverges in this dict

438

# - diverge = record all diverges in this dict

439

# - copy = record all non-divergent copies in this dict

439

# - copy = record all non-divergent copies in this dict

440

# - fullcopy = record all copies in this dict

440

# - fullcopy = record all copies in this dict

441

# - incomplete = record non-divergent partial copies here

441

# - incomplete = record non-divergent partial copies here

442

# - incompletediverge = record divergent partial copies here

442

# - incompletediverge = record divergent partial copies here

443

diverge = {} # divergence data is shared

443

diverge = {} # divergence data is shared

444

incompletediverge = {}

444

incompletediverge = {}

445

data1 = {'copy': {},

445

data1 = {'copy': {},

446

'fullcopy': {},

446

'fullcopy': {},

447

'incomplete': {},

447

'incomplete': {},

448

'diverge': diverge,

448

'diverge': diverge,

449

'incompletediverge': incompletediverge,

449

'incompletediverge': incompletediverge,

450

}

450

}

451

data2 = {'copy': {},

451

data2 = {'copy': {},

452

'fullcopy': {},

452

'fullcopy': {},

453

'incomplete': {},

453

'incomplete': {},

454

'diverge': diverge,

454

'diverge': diverge,

455

'incompletediverge': incompletediverge,

455

'incompletediverge': incompletediverge,

456

}

456

}

457

458

# find interesting file sets from manifests

458

# find interesting file sets from manifests

459

addedinm1 = m1.filesnotin(mb)

459

addedinm1 = m1.filesnotin(mb)

460

addedinm2 = m2.filesnotin(mb)

460

addedinm2 = m2.filesnotin(mb)

461

bothnew = sorted(addedinm1 & addedinm2)

461

bothnew = sorted(addedinm1 & addedinm2)

462

if tca == base:

462

if tca == base:

463

# unmatched file from base

463

# unmatched file from base

464

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

464

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

465

u1u, u2u = u1r, u2r

465

u1u, u2u = u1r, u2r

466

else:

466

else:

467

# unmatched file from base (DAG rotation in the graft case)

467

# unmatched file from base (DAG rotation in the graft case)

468

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

468

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

469

baselabel='base')

469

baselabel='base')

470

# unmatched file from topological common ancestors (no DAG rotation)

470

# unmatched file from topological common ancestors (no DAG rotation)

471

# need to recompute this for directory move handling when grafting

471

# need to recompute this for directory move handling when grafting

472

mta = tca.manifest()

472

mta = tca.manifest()

473

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

473

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

474

m2.filesnotin(mta),

474

m2.filesnotin(mta),

475

baselabel='topological common ancestor')

475

baselabel='topological common ancestor')

476

477

for f in u1u:

477

for f in u1u:

478

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

478

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

479

480

for f in u2u:

480

for f in u2u:

481

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

481

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

482

483

copy = dict(data1['copy'])

483

copy = dict(data1['copy'])

484

copy.update(data2['copy'])

484

copy.update(data2['copy'])

485

fullcopy = dict(data1['fullcopy'])

485

fullcopy = dict(data1['fullcopy'])

486

fullcopy.update(data2['fullcopy'])

486

fullcopy.update(data2['fullcopy'])

487

488

if dirtyc1:

488

if dirtyc1:

489

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

489

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

490

incompletediverge)

490

incompletediverge)

491

else:

491

else:

492

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

492

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

493

incompletediverge)

493

incompletediverge)

494

495

renamedelete = {}

495

renamedelete = {}

496

renamedeleteset = set()

496

renamedeleteset = set()

497

divergeset = set()

497

divergeset = set()

498

for of, fl in list(diverge.items()):

498

for of, fl in list(diverge.items()):

499

if len(fl) == 1 or of in c1 or of in c2:

499

if len(fl) == 1 or of in c1 or of in c2:

500

del diverge[of] # not actually divergent, or not a rename

500

del diverge[of] # not actually divergent, or not a rename

501

if of not in c1 and of not in c2:

501

if of not in c1 and of not in c2:

502

# renamed on one side, deleted on the other side, but filter

502

# renamed on one side, deleted on the other side, but filter

503

# out files that have been renamed and then deleted

503

# out files that have been renamed and then deleted

504

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

504

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

505

renamedeleteset.update(fl) # reverse map for below

505

renamedeleteset.update(fl) # reverse map for below

506

else:

506

else:

507

divergeset.update(fl) # reverse map for below

507

divergeset.update(fl) # reverse map for below

508

509

if bothnew:

509

if bothnew:

510

repo.ui.debug(" unmatched files new in both:\n %s\n"

510

repo.ui.debug(" unmatched files new in both:\n %s\n"

511

% "\n ".join(bothnew))

511

% "\n ".join(bothnew))

512

bothdiverge = {}

512

bothdiverge = {}

513

bothincompletediverge = {}

513

bothincompletediverge = {}

514

remainder = {}

514

remainder = {}

515

both1 = {'copy': {},

515

both1 = {'copy': {},

516

'fullcopy': {},

516

'fullcopy': {},

517

'incomplete': {},

517

'incomplete': {},

518

'diverge': bothdiverge,

518

'diverge': bothdiverge,

519

'incompletediverge': bothincompletediverge

519

'incompletediverge': bothincompletediverge

520

}

520

}

521

both2 = {'copy': {},

521

both2 = {'copy': {},

522

'fullcopy': {},

522

'fullcopy': {},

523

'incomplete': {},

523

'incomplete': {},

524

'diverge': bothdiverge,

524

'diverge': bothdiverge,

525

'incompletediverge': bothincompletediverge

525

'incompletediverge': bothincompletediverge

526

}

526

}

527

for f in bothnew:

527

for f in bothnew:

528

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

528

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

529

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

529

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

530

if dirtyc1:

530

if dirtyc1:

531

# incomplete copies may only be found on the "dirty" side for bothnew

531

# incomplete copies may only be found on the "dirty" side for bothnew

532

assert not both2['incomplete']

532

assert not both2['incomplete']

533

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

533

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

534

bothincompletediverge)

534

bothincompletediverge)

535

elif dirtyc2:

535

elif dirtyc2:

536

assert not both1['incomplete']

536

assert not both1['incomplete']

537

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

537

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

538

bothincompletediverge)

538

bothincompletediverge)

539

else:

539

else:

540

# incomplete copies and divergences can't happen outside grafts

540

# incomplete copies and divergences can't happen outside grafts

541

assert not both1['incomplete']

541

assert not both1['incomplete']

542

assert not both2['incomplete']

542

assert not both2['incomplete']

543

assert not bothincompletediverge

543

assert not bothincompletediverge

544

for f in remainder:

544

for f in remainder:

545

assert f not in bothdiverge

545

assert f not in bothdiverge

546

ic = remainder[f]

546

ic = remainder[f]

547

if ic[0] in (m1 if dirtyc1 else m2):

547

if ic[0] in (m1 if dirtyc1 else m2):

548

# backed-out rename on one side, but watch out for deleted files

548

# backed-out rename on one side, but watch out for deleted files

549

bothdiverge[f] = ic

549

bothdiverge[f] = ic

550

for of, fl in bothdiverge.items():

550

for of, fl in bothdiverge.items():

551

if len(fl) == 2 and fl[0] == fl[1]:

551

if len(fl) == 2 and fl[0] == fl[1]:

552

copy[fl[0]] = of # not actually divergent, just matching renames

552

copy[fl[0]] = of # not actually divergent, just matching renames

553

554

if fullcopy and repo.ui.debugflag:

554

if fullcopy and repo.ui.debugflag:

555

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

555

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

556

"% = renamed and deleted):\n")

556

"% = renamed and deleted):\n")

557

for f in sorted(fullcopy):

557

for f in sorted(fullcopy):

558

note = ""

558

note = ""

559

if f in copy:

559

if f in copy:

560

note += "*"

560

note += "*"

561

if f in divergeset:

561

if f in divergeset:

562

note += "!"

562

note += "!"

563

if f in renamedeleteset:

563

if f in renamedeleteset:

564

note += "%"

564

note += "%"

565

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

565

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

566

note))

566

note))

567

del divergeset

567

del divergeset

568

569

if not fullcopy:

569

if not fullcopy:

570

return copy, {}, diverge, renamedelete, {}

570

return copy, {}, diverge, renamedelete, {}

571

572

repo.ui.debug(" checking for directory renames\n")

572

repo.ui.debug(" checking for directory renames\n")

573

574

# generate a directory move map

574

# generate a directory move map

575

d1, d2 = c1.dirs(), c2.dirs()

575

d1, d2 = c1.dirs(), c2.dirs()

576

# Hack for adding '', which is not otherwise added, to d1 and d2

576

# Hack for adding '', which is not otherwise added, to d1 and d2

577

d1.addpath('/')

577

d1.addpath('/')

578

d2.addpath('/')

578

d2.addpath('/')

579

invalid = set()

579

invalid = set()

580

dirmove = {}

580

dirmove = {}

581

582

# examine each file copy for a potential directory move, which is

582

# examine each file copy for a potential directory move, which is

583

# when all the files in a directory are moved to a new directory

583

# when all the files in a directory are moved to a new directory

584

for dst, src in fullcopy.iteritems():

584

for dst, src in fullcopy.iteritems():

585

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

585

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

586

if dsrc in invalid:

586

if dsrc in invalid:

587

# already seen to be uninteresting

587

# already seen to be uninteresting

588

continue

588

continue

589

elif dsrc in d1 and ddst in d1:

589

elif dsrc in d1 and ddst in d1:

590

# directory wasn't entirely moved locally

590

# directory wasn't entirely moved locally

591

invalid.add(dsrc + "/")

591

invalid.add(dsrc + "/")

592

elif dsrc in d2 and ddst in d2:

592

elif dsrc in d2 and ddst in d2:

593

# directory wasn't entirely moved remotely

593

# directory wasn't entirely moved remotely

594

invalid.add(dsrc + "/")

594

invalid.add(dsrc + "/")

595

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

595

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

596

# files from the same directory moved to two different places

596

# files from the same directory moved to two different places

597

invalid.add(dsrc + "/")

597

invalid.add(dsrc + "/")

598

else:

598

else:

599

# looks good so far

599

# looks good so far

600

dirmove[dsrc + "/"] = ddst + "/"

600

dirmove[dsrc + "/"] = ddst + "/"

601

602

for i in invalid:

602

for i in invalid:

603

if i in dirmove:

603

if i in dirmove:

604

del dirmove[i]

604

del dirmove[i]

605

del d1, d2, invalid

605

del d1, d2, invalid

606

607

if not dirmove:

607

if not dirmove:

608

return copy, {}, diverge, renamedelete, {}

608

return copy, {}, diverge, renamedelete, {}

609

610

for d in dirmove:

610

for d in dirmove:

611

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

611

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

612

(d, dirmove[d]))

612

(d, dirmove[d]))

613

614

movewithdir = {}

614

movewithdir = {}

615

# check unaccounted nonoverlapping files against directory moves

615

# check unaccounted nonoverlapping files against directory moves

616

for f in u1r + u2r:

616

for f in u1r + u2r:

617

if f not in fullcopy:

617

if f not in fullcopy:

618

for d in dirmove:

618

for d in dirmove:

619

if f.startswith(d):

619

if f.startswith(d):

620

# new file added in a directory that was moved, move it

620

# new file added in a directory that was moved, move it

621

df = dirmove[d] + f[len(d):]

621

df = dirmove[d] + f[len(d):]

622

if df not in copy:

622

if df not in copy:

623

movewithdir[f] = df

623

movewithdir[f] = df

624

repo.ui.debug((" pending file src: '%s' -> "

624

repo.ui.debug((" pending file src: '%s' -> "

625

"dst: '%s'\n") % (f, df))

625

"dst: '%s'\n") % (f, df))

626

break

626

break

627

628

return copy, movewithdir, diverge, renamedelete, dirmove

628

return copy, movewithdir, diverge, renamedelete, dirmove

629

630

def _heuristicscopytracing(repo, c1, c2, base):

630

def _heuristicscopytracing(repo, c1, c2, base):

631

""" Fast copytracing using filename heuristics

631

""" Fast copytracing using filename heuristics

632

633

Assumes that moves or renames are of following two types:

633

Assumes that moves or renames are of following two types:

634

635

1) Inside a directory only (same directory name but different filenames)

635

1) Inside a directory only (same directory name but different filenames)

636

2) Move from one directory to another

636

2) Move from one directory to another

637

(same filenames but different directory names)

637

(same filenames but different directory names)

638

639

Works only when there are no merge commits in the "source branch".

639

Works only when there are no merge commits in the "source branch".

640

Source branch is commits from base up to c2 not including base.

640

Source branch is commits from base up to c2 not including base.

641

642

If merge is involved it fallbacks to _fullcopytracing().

642

If merge is involved it fallbacks to _fullcopytracing().

643

644

Can be used by setting the following config:

644

Can be used by setting the following config:

645

646

[experimental]

646

[experimental]

647

copytrace = heuristics

647

copytrace = heuristics

648

649

In some cases the copy/move candidates found by heuristics can be very large

649

In some cases the copy/move candidates found by heuristics can be very large

650

in number and that will make the algorithm slow. The number of possible

650

in number and that will make the algorithm slow. The number of possible

651

candidates to check can be limited by using the config

651

candidates to check can be limited by using the config

652

`experimental.copytrace.movecandidateslimit` which defaults to 100.

652

`experimental.copytrace.movecandidateslimit` which defaults to 100.

653

"""

653

"""

654

655

if c1.rev() is None:

655

if c1.rev() is None:

656

c1 = c1.p1()

656

c1 = c1.p1()

657

if c2.rev() is None:

657

if c2.rev() is None:

658

c2 = c2.p1()

658

c2 = c2.p1()

659

660

copies = {}

660

copies = {}

661

662

changedfiles = set()

662

changedfiles = set()

663

m1 = c1.manifest()

663

m1 = c1.manifest()

664

if not repo.revs('%d::%d', base.rev(), c2.rev()):

664

if not repo.revs('%d::%d', base.rev(), c2.rev()):

665

# If base is not in c2 branch, we switch to fullcopytracing

665

# If base is not in c2 branch, we switch to fullcopytracing

666

repo.ui.debug("switching to full copytracing as base is not "

666

repo.ui.debug("switching to full copytracing as base is not "

667

"an ancestor of c2\n")

667

"an ancestor of c2\n")

668

return _fullcopytracing(repo, c1, c2, base)

668

return _fullcopytracing(repo, c1, c2, base)

669

670

ctx = c2

670

ctx = c2

671

while ctx != base:

671

while ctx != base:

672

if len(ctx.parents()) == 2:

672

if len(ctx.parents()) == 2:

673

# To keep things simple let's not handle merges

673

# To keep things simple let's not handle merges

674

repo.ui.debug("switching to full copytracing because of merges\n")

674

repo.ui.debug("switching to full copytracing because of merges\n")

675

return _fullcopytracing(repo, c1, c2, base)

675

return _fullcopytracing(repo, c1, c2, base)

676

changedfiles.update(ctx.files())

676

changedfiles.update(ctx.files())

677

ctx = ctx.p1()

677

ctx = ctx.p1()

678

679

cp = _forwardcopies(base, c2)

679

cp = _forwardcopies(base, c2)

680

for dst, src in cp.iteritems():

680

for dst, src in cp.iteritems():

681

if src in m1:

681

if src in m1:

682

copies[dst] = src

682

copies[dst] = src

683

684

# file is missing if it isn't present in the destination, but is present in

684

# file is missing if it isn't present in the destination, but is present in

685

# the base and present in the source.

685

# the base and present in the source.

686

# Presence in the base is important to exclude added files, presence in the

686

# Presence in the base is important to exclude added files, presence in the

687

# source is important to exclude removed files.

687

# source is important to exclude removed files.

688

filt = lambda f: f not in m1 and f in base and f in c2

688

filt = lambda f: f not in m1 and f in base and f in c2

689

missingfiles = [f for f in changedfiles if filt(f)]

689

missingfiles = [f for f in changedfiles if filt(f)]

690

691

if missingfiles:

691

if missingfiles:

692

basenametofilename = collections.defaultdict(list)

692

basenametofilename = collections.defaultdict(list)

693

dirnametofilename = collections.defaultdict(list)

693

dirnametofilename = collections.defaultdict(list)

694

695

for f in m1.filesnotin(base.manifest()):

695

for f in m1.filesnotin(base.manifest()):

696

basename = os.path.basename(f)

696

basename = os.path.basename(f)

697

dirname = os.path.dirname(f)

697

dirname = os.path.dirname(f)

698

basenametofilename[basename].append(f)

698

basenametofilename[basename].append(f)

699

dirnametofilename[dirname].append(f)

699

dirnametofilename[dirname].append(f)

700

701

# in case of a rebase/graft, base may not be a common ancestor

702

anc = c1.ancestor(c2)

703

704

for f in missingfiles:

701

for f in missingfiles:

705

basename = os.path.basename(f)

702

basename = os.path.basename(f)

706

dirname = os.path.dirname(f)

703

dirname = os.path.dirname(f)

707

samebasename = basenametofilename[basename]

704

samebasename = basenametofilename[basename]

708

samedirname = dirnametofilename[dirname]

705

samedirname = dirnametofilename[dirname]

709

movecandidates = samebasename + samedirname

706

movecandidates = samebasename + samedirname

710

# f is guaranteed to be present in c2, that's why

707

# f is guaranteed to be present in c2, that's why

711

# c2.filectx(f) won't fail

708

# c2.filectx(f) won't fail

712

f2 = c2.filectx(f)

709

f2 = c2.filectx(f)

713

# we can have a lot of candidates which can slow down the heuristics

710

# we can have a lot of candidates which can slow down the heuristics

714

# config value to limit the number of candidates moves to check

711

# config value to limit the number of candidates moves to check

715

maxcandidates = repo.ui.configint('experimental',

712

maxcandidates = repo.ui.configint('experimental',

716

'copytrace.movecandidateslimit')

713

'copytrace.movecandidateslimit')

717

714

718

if len(movecandidates) > maxcandidates:

715

if len(movecandidates) > maxcandidates:

719

repo.ui.status(_("skipping copytracing for '%s', more "

716

repo.ui.status(_("skipping copytracing for '%s', more "

720

"candidates than the limit: %d\n")

717

"candidates than the limit: %d\n")

721

% (f, len(movecandidates)))

718

% (f, len(movecandidates)))

722

continue

719

continue

723

720

724

for candidate in movecandidates:

721

for candidate in movecandidates:

725

f1 = c1.filectx(candidate)

722

f1 = c1.filectx(candidate)

726

if _related(f1, f2, ~~anc~~.~~rev~~()):

723

if _related(f1, f2):

727

# if there are a few related copies then we'll merge

724

# if there are a few related copies then we'll merge

728

# changes into all of them. This matches the behaviour

725

# changes into all of them. This matches the behaviour

729

# of upstream copytracing

726

# of upstream copytracing

730

copies[candidate] = f

727

copies[candidate] = f

731

728

732

return copies, {}, {}, {}, {}

729

return copies, {}, {}, {}, {}

733

730

734

def _related(f1, f2, ~~limit~~):

731

def _related(f1, f2):

735

"""return True if f1 and f2 filectx have a common ancestor

732

"""return True if f1 and f2 filectx have a common ancestor

736

733

737

Walk back to common ancestor to see if the two files originate

734

Walk back to common ancestor to see if the two files originate

738

from the same file. Since workingfilectx's rev() is None it messes

735

from the same file. Since workingfilectx's rev() is None it messes

739

up the integer comparison logic, hence the pre-step check for

736

up the integer comparison logic, hence the pre-step check for

740

None (f1 and f2 can only be workingfilectx's initially).

737

None (f1 and f2 can only be workingfilectx's initially).

741

"""

738

"""

742

739

743

if f1 == f2:

740

if f1 == f2:

744

return f1 # a match

741

return f1 # a match

745

742

746

g1, g2 = f1.ancestors(), f2.ancestors()

743

g1, g2 = f1.ancestors(), f2.ancestors()

747

try:

744

try:

748

f1r, f2r = f1.linkrev(), f2.linkrev()

745

f1r, f2r = f1.linkrev(), f2.linkrev()

749

746

750

if f1r is None:

747

if f1r is None:

751

f1 = next(g1)

748

f1 = next(g1)

752

if f2r is None:

749

if f2r is None:

753

f2 = next(g2)

750

f2 = next(g2)

754

751

755

while True:

752

while True:

756

f1r, f2r = f1.linkrev(), f2.linkrev()

753

f1r, f2r = f1.linkrev(), f2.linkrev()

757

if f1r > f2r:

754

if f1r > f2r:

758

f1 = next(g1)

755

f1 = next(g1)

759

elif f2r > f1r:

756

elif f2r > f1r:

760

f2 = next(g2)

757

f2 = next(g2)

761

elif f1 == f2:

758

else: # f1 and f2 point to files in the same linkrev

762

return f1 # a match

759

return f1 == f2 # true if they point to the same file

763

elif f1r == f2r or f1r < limit or f2r < limit:

764

return False # copy no longer relevant

765

except StopIteration:

760

except StopIteration:

766

return False

761

return False

767

762

768

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

763

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

769

"""

764

"""

770

check possible copies of f from msrc to mdst

765

check possible copies of f from msrc to mdst

771

766

772

srcctx = starting context for f in msrc

767

srcctx = starting context for f in msrc

773

dstctx = destination context for f in mdst

768

dstctx = destination context for f in mdst

774

f = the filename to check (as in msrc)

769

f = the filename to check (as in msrc)

775

base = the changectx used as a merge base

770

base = the changectx used as a merge base

776

tca = topological common ancestor for graft-like scenarios

771

tca = topological common ancestor for graft-like scenarios

777

remotebase = True if base is outside tca::srcctx, False otherwise

772

remotebase = True if base is outside tca::srcctx, False otherwise

778

limit = the rev number to not search beyond

773

limit = the rev number to not search beyond

779

data = dictionary of dictionary to store copy data. (see mergecopies)

774

data = dictionary of dictionary to store copy data. (see mergecopies)

780

775

781

note: limit is only an optimization, and provides no guarantee that

776

note: limit is only an optimization, and provides no guarantee that

782

irrelevant revisions will not be visited

777

irrelevant revisions will not be visited

783

there is no easy way to make this algorithm stop in a guaranteed way

778

there is no easy way to make this algorithm stop in a guaranteed way

784

once it "goes behind a certain revision".

779

once it "goes behind a certain revision".

785

"""

780

"""

786

781

787

msrc = srcctx.manifest()

782

msrc = srcctx.manifest()

788

mdst = dstctx.manifest()

783

mdst = dstctx.manifest()

789

mb = base.manifest()

784

mb = base.manifest()

790

mta = tca.manifest()

785

mta = tca.manifest()

791

# Might be true if this call is about finding backward renames,

786

# Might be true if this call is about finding backward renames,

792

# This happens in the case of grafts because the DAG is then rotated.

787

# This happens in the case of grafts because the DAG is then rotated.

793

# If the file exists in both the base and the source, we are not looking

788

# If the file exists in both the base and the source, we are not looking

794

# for a rename on the source side, but on the part of the DAG that is

789

# for a rename on the source side, but on the part of the DAG that is

795

# traversed backwards.

790

# traversed backwards.

796

#

791

#

797

# In the case there is both backward and forward renames (before and after

792

# In the case there is both backward and forward renames (before and after

798

# the base) this is more complicated as we must detect a divergence.

793

# the base) this is more complicated as we must detect a divergence.

799

# We use 'backwards = False' in that case.

794

# We use 'backwards = False' in that case.

800

backwards = not remotebase and base != tca and f in mb

795

backwards = not remotebase and base != tca and f in mb

801

getsrcfctx = _makegetfctx(srcctx)

796

getsrcfctx = _makegetfctx(srcctx)

802

getdstfctx = _makegetfctx(dstctx)

797

getdstfctx = _makegetfctx(dstctx)

803

798

804

if msrc[f] == mb.get(f) and not remotebase:

799

if msrc[f] == mb.get(f) and not remotebase:

805

# Nothing to merge

800

# Nothing to merge

806

return

801

return

807

802

808

of = None

803

of = None

809

seen = {f}

804

seen = {f}

810

for oc in getsrcfctx(f, msrc[f]).ancestors():

805

for oc in getsrcfctx(f, msrc[f]).ancestors():

811

ocr = oc.linkrev()

806

ocr = oc.linkrev()

812

of = oc.path()

807

of = oc.path()

813

if of in seen:

808

if of in seen:

814

# check limit late - grab last rename before

809

# check limit late - grab last rename before

815

if ocr < limit:

810

if ocr < limit:

816

break

811

break

817

continue

812

continue

818

seen.add(of)

813

seen.add(of)

819

814

820

# remember for dir rename detection

815

# remember for dir rename detection

821

if backwards:

816

if backwards:

822

data['fullcopy'][of] = f # grafting backwards through renames

817

data['fullcopy'][of] = f # grafting backwards through renames

823

else:

818

else:

824

data['fullcopy'][f] = of

819

data['fullcopy'][f] = of

825

if of not in mdst:

820

if of not in mdst:

826

continue # no match, keep looking

821

continue # no match, keep looking

827

if mdst[of] == mb.get(of):

822

if mdst[of] == mb.get(of):

828

return # no merge needed, quit early

823

return # no merge needed, quit early

829

c2 = getdstfctx(of, mdst[of])

824

c2 = getdstfctx(of, mdst[of])

830

# c2 might be a plain new file on added on destination side that is

825

# c2 might be a plain new file on added on destination side that is

831

# unrelated to the droids we are looking for.

826

# unrelated to the droids we are looking for.

832

cr = _related(oc, c2, ~~tca~~.~~rev~~())

827

cr = _related(oc, c2)

833

if cr and (of == f or of == c2.path()): # non-divergent

828

if cr and (of == f or of == c2.path()): # non-divergent

834

if backwards:

829

if backwards:

835

data['copy'][of] = f

830

data['copy'][of] = f

836

elif of in mb:

831

elif of in mb:

837

data['copy'][f] = of

832

data['copy'][f] = of

838

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

833

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

839

data['copy'][of] = f

834

data['copy'][of] = f

840

del data['fullcopy'][f]

835

del data['fullcopy'][f]

841

data['fullcopy'][of] = f

836

data['fullcopy'][of] = f

842

else: # divergence w.r.t. graft CA on one side of topological CA

837

else: # divergence w.r.t. graft CA on one side of topological CA

843

for sf in seen:

838

for sf in seen:

844

if sf in mb:

839

if sf in mb:

845

assert sf not in data['diverge']

840

assert sf not in data['diverge']

846

data['diverge'][sf] = [f, of]

841

data['diverge'][sf] = [f, of]

847

break

842

break

848

return

843

return

849

844

850

if of in mta:

845

if of in mta:

851

if backwards or remotebase:

846

if backwards or remotebase:

852

data['incomplete'][of] = f

847

data['incomplete'][of] = f

853

else:

848

else:

854

for sf in seen:

849

for sf in seen:

855

if sf in mb:

850

if sf in mb:

856

if tca == base:

851

if tca == base:

857

data['diverge'].setdefault(sf, []).append(f)

852

data['diverge'].setdefault(sf, []).append(f)

858

else:

853

else:

859

data['incompletediverge'][sf] = [of, f]

854

data['incompletediverge'][sf] = [of, f]

860

return

855

return

861

856

862

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

857

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

863

"""reproduce copies from fromrev to rev in the dirstate

858

"""reproduce copies from fromrev to rev in the dirstate

864

859

865

If skiprev is specified, it's a revision that should be used to

860

If skiprev is specified, it's a revision that should be used to

866

filter copy records. Any copies that occur between fromrev and

861

filter copy records. Any copies that occur between fromrev and

867

skiprev will not be duplicated, even if they appear in the set of

862

skiprev will not be duplicated, even if they appear in the set of

868

copies between fromrev and rev.

863

copies between fromrev and rev.

869

"""

864

"""

870

exclude = {}

865

exclude = {}

871

if (skiprev is not None and

866

if (skiprev is not None and

872

repo.ui.config('experimental', 'copytrace') != 'off'):

867

repo.ui.config('experimental', 'copytrace') != 'off'):

873

# copytrace='off' skips this line, but not the entire function because

868

# copytrace='off' skips this line, but not the entire function because

874

# the line below is O(size of the repo) during a rebase, while the rest

869

# the line below is O(size of the repo) during a rebase, while the rest

875

# of the function is much faster (and is required for carrying copy

870

# of the function is much faster (and is required for carrying copy

876

# metadata across the rebase anyway).

871

# metadata across the rebase anyway).

877

exclude = pathcopies(repo[fromrev], repo[skiprev])

872

exclude = pathcopies(repo[fromrev], repo[skiprev])

878

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

873

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

879

# copies.pathcopies returns backward renames, so dst might not

874

# copies.pathcopies returns backward renames, so dst might not

880

# actually be in the dirstate

875

# actually be in the dirstate

881

if dst in exclude:

876

if dst in exclude:

882

continue

877

continue

883

wctx[dst].markcopied(src)

878

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in list(t.items()):
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d, match=None):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 return cm
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirnodes:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'off':
                     return {}, {}, {}, {}, {}
                 elif copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.descendant(wc) and base.descendant(base) are False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not (base == _c1 or base.descendant(_c1))
                 dirtyc2 = not (base == _c2 or base.descendant(_c2))
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb)
                 addedinm2 = m2.filesnotin(mb)
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
                                                                 m2.filesnotin(mta),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
-                    # in case of a rebase/graft, base may not be a common ancestor
-                    anc = c1.ancestor(c2)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
-                            if _related(f1, f2, anc.rev()):
+                            if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
-            def _related(f1, f2, limit):
+            def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
-                        elif f1 == f2:
+                        else: # f1 and f2 point to files in the same linkrev
-                            return f1 # a match
+                            return f1 == f2 # true if they point to the same file
-                        elif f1r == f2r or f1r < limit or f2r < limit:
-                            return False # copy no longer relevant
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
-                    cr = _related(oc, c2, tca.rev())
+                    cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 if (skiprev is not None and
                     repo.ui.config('experimental', 'copytrace') != 'off'):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)