upstream/mercurial-mirror Commit - r35421:7ddc1e96

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

24

def _findlimit(repo, a, b):

24

def _findlimit(repo, a, b):

25

"""

25

"""

26

Find the last revision that needs to be checked to ensure that a full

26

Find the last revision that needs to be checked to ensure that a full

27

transitive closure for file copies can be properly calculated.

27

transitive closure for file copies can be properly calculated.

28

Generally, this means finding the earliest revision number that's an

28

Generally, this means finding the earliest revision number that's an

29

ancestor of a or b but not both, except when a or b is a direct descendent

29

ancestor of a or b but not both, except when a or b is a direct descendent

30

of the other, in which case we can return the minimum revnum of a and b.

30

of the other, in which case we can return the minimum revnum of a and b.

31

None if no such revision exists.

31

None if no such revision exists.

32

"""

32

"""

33

34

# basic idea:

34

# basic idea:

35

# - mark a and b with different sides

35

# - mark a and b with different sides

36

# - if a parent's children are all on the same side, the parent is

36

# - if a parent's children are all on the same side, the parent is

37

# on that side, otherwise it is on no side

37

# on that side, otherwise it is on no side

38

# - walk the graph in topological order with the help of a heap;

38

# - walk the graph in topological order with the help of a heap;

39

# - add unseen parents to side map

39

# - add unseen parents to side map

40

# - clear side of any parent that has children on different sides

40

# - clear side of any parent that has children on different sides

41

# - track number of interesting revs that might still be on a side

41

# - track number of interesting revs that might still be on a side

42

# - track the lowest interesting rev seen

42

# - track the lowest interesting rev seen

43

# - quit when interesting revs is zero

43

# - quit when interesting revs is zero

44

45

cl = repo.changelog

45

cl = repo.changelog

46

working = len(cl) # pseudo rev for the working directory

46

working = len(cl) # pseudo rev for the working directory

47

if a is None:

47

if a is None:

48

a = working

48

a = working

49

if b is None:

49

if b is None:

50

b = working

50

b = working

51

52

side = {a: -1, b: 1}

52

side = {a: -1, b: 1}

53

visit = [-a, -b]

53

visit = [-a, -b]

54

heapq.heapify(visit)

54

heapq.heapify(visit)

55

interesting = len(visit)

55

interesting = len(visit)

56

hascommonancestor = False

56

hascommonancestor = False

57

limit = working

57

limit = working

58

59

while interesting:

59

while interesting:

60

r = -heapq.heappop(visit)

60

r = -heapq.heappop(visit)

61

if r == working:

61

if r == working:

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

63

else:

63

else:

64

parents = cl.parentrevs(r)

64

parents = cl.parentrevs(r)

65

for p in parents:

65

for p in parents:

66

if p < 0:

66

if p < 0:

67

continue

67

continue

68

if p not in side:

68

if p not in side:

69

# first time we see p; add it to visit

69

# first time we see p; add it to visit

70

side[p] = side[r]

70

side[p] = side[r]

71

if side[p]:

71

if side[p]:

72

interesting += 1

72

interesting += 1

73

heapq.heappush(visit, -p)

73

heapq.heappush(visit, -p)

74

elif side[p] and side[p] != side[r]:

74

elif side[p] and side[p] != side[r]:

75

# p was interesting but now we know better

75

# p was interesting but now we know better

76

side[p] = 0

76

side[p] = 0

77

interesting -= 1

77

interesting -= 1

78

hascommonancestor = True

78

hascommonancestor = True

79

if side[r]:

79

if side[r]:

80

limit = r # lowest rev visited

80

limit = r # lowest rev visited

81

interesting -= 1

81

interesting -= 1

82

83

if not hascommonancestor:

83

if not hascommonancestor:

84

return None

84

return None

85

86

# Consider the following flow (see test-commit-amend.t under issue4405):

86

# Consider the following flow (see test-commit-amend.t under issue4405):

87

# 1/ File 'a0' committed

87

# 1/ File 'a0' committed

88

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

88

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

89

# 3/ Move back to first commit

89

# 3/ Move back to first commit

90

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

90

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

91

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

91

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

92

#

92

#

93

# During the amend in step five, we will be in this state:

93

# During the amend in step five, we will be in this state:

94

#

94

#

95

# @ 3 temporary amend commit for a1-amend

95

# @ 3 temporary amend commit for a1-amend

96

# |

96

# |

97

# o 2 a1-amend

97

# o 2 a1-amend

98

# |

98

# |

99

# | o 1 a1

99

# | o 1 a1

100

# |/

100

# |/

101

# o 0 a0

101

# o 0 a0

102

#

102

#

103

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

103

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

104

# yet the filelog has the copy information in rev 1 and we will not look

104

# yet the filelog has the copy information in rev 1 and we will not look

105

# back far enough unless we also look at the a and b as candidates.

105

# back far enough unless we also look at the a and b as candidates.

106

# This only occurs when a is a descendent of b or visa-versa.

106

# This only occurs when a is a descendent of b or visa-versa.

107

return min(limit, a, b)

107

return min(limit, a, b)

108

109

def _chain(src, dst, a, b):

109

def _chain(src, dst, a, b):

110

'''chain two sets of copies a->b'''

110

'''chain two sets of copies a->b'''

111

t = a.copy()

111

t = a.copy()

112

for k, v in b.iteritems():

112

for k, v in b.iteritems():

113

if v in t:

113

if v in t:

114

# found a chain

114

# found a chain

115

if t[v] != k:

115

if t[v] != k:

116

# file wasn't renamed back to itself

116

# file wasn't renamed back to itself

117

t[k] = t[v]

117

t[k] = t[v]

118

if v not in dst:

118

if v not in dst:

119

# chain was a rename, not a copy

119

# chain was a rename, not a copy

120

del t[v]

120

del t[v]

121

if v in src:

121

if v in src:

122

# file is a copy of an existing file

122

# file is a copy of an existing file

123

t[k] = v

123

t[k] = v

124

125

# remove criss-crossed copies

125

# remove criss-crossed copies

126

for k, v in t.items():

126

for k, v in t.items():

127

if k in src and v in dst:

127

if k in src and v in dst:

128

del t[k]

128

del t[k]

129

130

return t

130

return t

131

132

def _tracefile(fctx, am, limit=-1):

132

def _tracefile(fctx, am, limit=-1):

133

'''return file context that is the ancestor of fctx present in ancestor

133

'''return file context that is the ancestor of fctx present in ancestor

134

manifest am, stopping after the first ancestor lower than limit'''

134

manifest am, stopping after the first ancestor lower than limit'''

135

136

for f in fctx.ancestors():

136

for f in fctx.ancestors():

137

if am.get(f.path(), None) == f.filenode():

137

if am.get(f.path(), None) == f.filenode():

138

return f

138

return f

139

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

139

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

140

return None

140

return None

141

142

def _dirstatecopies(d):

142

def _dirstatecopies(d, match=None):

143

ds = d._repo.dirstate

143

ds = d._repo.dirstate

144

c = ds.copies().copy()

144

c = ds.copies().copy()

145

for k in list(c):

145

for k in list(c):

146

if ds[k] not in 'anm':

146

if ds[k] not in 'anm' or (match and not match(k)):

147

del c[k]

147

del c[k]

148

return c

148

return c

149

150

def _computeforwardmissing(a, b, match=None):

150

def _computeforwardmissing(a, b, match=None):

151

"""Computes which files are in b but not a.

151

"""Computes which files are in b but not a.

152

This is its own function so extensions can easily wrap this call to see what

152

This is its own function so extensions can easily wrap this call to see what

153

files _forwardcopies is about to process.

153

files _forwardcopies is about to process.

154

"""

154

"""

155

ma = a.manifest()

155

ma = a.manifest()

156

mb = b.manifest()

156

mb = b.manifest()

157

return mb.filesnotin(ma, match=match)

157

return mb.filesnotin(ma, match=match)

158

159

def _forwardcopies(a, b, match=None):

159

def _forwardcopies(a, b, match=None):

160

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

160

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

161

162

# check for working copy

162

# check for working copy

163

w = None

163

w = None

164

if b.rev() is None:

164

if b.rev() is None:

165

w = b

165

w = b

166

b = w.p1()

166

b = w.p1()

167

if a == b:

167

if a == b:

168

# short-circuit to avoid issues with merge states

168

# short-circuit to avoid issues with merge states

169

return _dirstatecopies(w)

169

return _dirstatecopies(w, match)

170

171

# files might have to be traced back to the fctx parent of the last

171

# files might have to be traced back to the fctx parent of the last

172

# one-side-only changeset, but not further back than that

172

# one-side-only changeset, but not further back than that

173

limit = _findlimit(a._repo, a.rev(), b.rev())

173

limit = _findlimit(a._repo, a.rev(), b.rev())

174

if limit is None:

174

if limit is None:

175

limit = -1

175

limit = -1

176

am = a.manifest()

176

am = a.manifest()

177

178

# find where new files came from

178

# find where new files came from

179

# we currently don't try to find where old files went, too expensive

179

# we currently don't try to find where old files went, too expensive

180

# this means we can miss a case like 'hg rm b; hg cp a b'

180

# this means we can miss a case like 'hg rm b; hg cp a b'

181

cm = {}

181

cm = {}

182

183

# Computing the forward missing is quite expensive on large manifests, since

183

# Computing the forward missing is quite expensive on large manifests, since

184

# it compares the entire manifests. We can optimize it in the common use

184

# it compares the entire manifests. We can optimize it in the common use

185

# case of computing what copies are in a commit versus its parent (like

185

# case of computing what copies are in a commit versus its parent (like

186

# during a rebase or histedit). Note, we exclude merge commits from this

186

# during a rebase or histedit). Note, we exclude merge commits from this

187

# optimization, since the ctx.files() for a merge commit is not correct for

187

# optimization, since the ctx.files() for a merge commit is not correct for

188

# this comparison.

188

# this comparison.

189

forwardmissingmatch = match

189

forwardmissingmatch = match

190

if b.p1() == a and b.p2().node() == node.nullid:

190

if b.p1() == a and b.p2().node() == node.nullid:

191

filesmatcher = scmutil.matchfiles(a._repo, b.files())

191

filesmatcher = scmutil.matchfiles(a._repo, b.files())

192

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

192

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

193

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

193

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

194

195

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

195

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

196

for f in missing:

196

for f in missing:

197

fctx = b[f]

197

fctx = b[f]

198

fctx._ancestrycontext = ancestrycontext

198

fctx._ancestrycontext = ancestrycontext

199

ofctx = _tracefile(fctx, am, limit)

199

ofctx = _tracefile(fctx, am, limit)

200

if ofctx:

200

if ofctx:

201

cm[f] = ofctx.path()

201

cm[f] = ofctx.path()

202

203

# combine copies from dirstate if necessary

203

# combine copies from dirstate if necessary

204

if w is not None:

204

if w is not None:

205

cm = _chain(a, w, cm, _dirstatecopies(w))

205

cm = _chain(a, w, cm, _dirstatecopies(w, match))

206

207

return cm

207

return cm

208

209

def _backwardrenames(a, b):

209

def _backwardrenames(a, b):

210

if a._repo.ui.config('experimental', 'copytrace') == 'off':

210

if a._repo.ui.config('experimental', 'copytrace') == 'off':

211

return {}

211

return {}

212

213

# Even though we're not taking copies into account, 1:n rename situations

213

# Even though we're not taking copies into account, 1:n rename situations

214

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

214

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

215

# arbitrarily pick one of the renames.

215

# arbitrarily pick one of the renames.

216

f = _forwardcopies(b, a)

216

f = _forwardcopies(b, a)

217

r = {}

217

r = {}

218

for k, v in sorted(f.iteritems()):

218

for k, v in sorted(f.iteritems()):

219

# remove copies

219

# remove copies

220

if v in a:

220

if v in a:

221

continue

221

continue

222

r[v] = k

222

r[v] = k

223

return r

223

return r

224

225

def pathcopies(x, y, match=None):

225

def pathcopies(x, y, match=None):

226

'''find {dst@y: src@x} copy mapping for directed compare'''

226

'''find {dst@y: src@x} copy mapping for directed compare'''

227

if x == y or not x or not y:

227

if x == y or not x or not y:

228

return {}

228

return {}

229

a = y.ancestor(x)

229

a = y.ancestor(x)

230

if a == x:

230

if a == x:

231

return _forwardcopies(x, y, match=match)

231

return _forwardcopies(x, y, match=match)

232

if a == y:

232

if a == y:

233

return _backwardrenames(x, y)

233

return _backwardrenames(x, y)

234

return _chain(x, y, _backwardrenames(x, a),

234

return _chain(x, y, _backwardrenames(x, a),

235

_forwardcopies(a, y, match=match))

235

_forwardcopies(a, y, match=match))

236

237

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

237

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

238

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

238

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

239

and c2. This is its own function so extensions can easily wrap this call

239

and c2. This is its own function so extensions can easily wrap this call

240

to see what files mergecopies is about to process.

240

to see what files mergecopies is about to process.

241

242

Even though c1 and c2 are not used in this function, they are useful in

242

Even though c1 and c2 are not used in this function, they are useful in

243

other extensions for being able to read the file nodes of the changed files.

243

other extensions for being able to read the file nodes of the changed files.

244

245

"baselabel" can be passed to help distinguish the multiple computations

245

"baselabel" can be passed to help distinguish the multiple computations

246

done in the graft case.

246

done in the graft case.

247

"""

247

"""

248

u1 = sorted(addedinm1 - addedinm2)

248

u1 = sorted(addedinm1 - addedinm2)

249

u2 = sorted(addedinm2 - addedinm1)

249

u2 = sorted(addedinm2 - addedinm1)

250

251

header = " unmatched files in %s"

251

header = " unmatched files in %s"

252

if baselabel:

252

if baselabel:

253

header += ' (from %s)' % baselabel

253

header += ' (from %s)' % baselabel

254

if u1:

254

if u1:

255

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

255

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

256

if u2:

256

if u2:

257

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

257

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

258

return u1, u2

258

return u1, u2

259

260

def _makegetfctx(ctx):

260

def _makegetfctx(ctx):

261

"""return a 'getfctx' function suitable for _checkcopies usage

261

"""return a 'getfctx' function suitable for _checkcopies usage

262

263

We have to re-setup the function building 'filectx' for each

263

We have to re-setup the function building 'filectx' for each

264

'_checkcopies' to ensure the linkrev adjustment is properly setup for

264

'_checkcopies' to ensure the linkrev adjustment is properly setup for

265

each. Linkrev adjustment is important to avoid bug in rename

265

each. Linkrev adjustment is important to avoid bug in rename

266

detection. Moreover, having a proper '_ancestrycontext' setup ensures

266

detection. Moreover, having a proper '_ancestrycontext' setup ensures

267

the performance impact of this adjustment is kept limited. Without it,

267

the performance impact of this adjustment is kept limited. Without it,

268

each file could do a full dag traversal making the time complexity of

268

each file could do a full dag traversal making the time complexity of

269

the operation explode (see issue4537).

269

the operation explode (see issue4537).

270

271

This function exists here mostly to limit the impact on stable. Feel

271

This function exists here mostly to limit the impact on stable. Feel

272

free to refactor on default.

272

free to refactor on default.

273

"""

273

"""

274

rev = ctx.rev()

274

rev = ctx.rev()

275

repo = ctx._repo

275

repo = ctx._repo

276

ac = getattr(ctx, '_ancestrycontext', None)

276

ac = getattr(ctx, '_ancestrycontext', None)

277

if ac is None:

277

if ac is None:

278

revs = [rev]

278

revs = [rev]

279

if rev is None:

279

if rev is None:

280

revs = [p.rev() for p in ctx.parents()]

280

revs = [p.rev() for p in ctx.parents()]

281

ac = repo.changelog.ancestors(revs, inclusive=True)

281

ac = repo.changelog.ancestors(revs, inclusive=True)

282

ctx._ancestrycontext = ac

282

ctx._ancestrycontext = ac

283

def makectx(f, n):

283

def makectx(f, n):

284

if n in node.wdirnodes: # in a working context?

284

if n in node.wdirnodes: # in a working context?

285

if ctx.rev() is None:

285

if ctx.rev() is None:

286

return ctx.filectx(f)

286

return ctx.filectx(f)

287

return repo[None][f]

287

return repo[None][f]

288

fctx = repo.filectx(f, fileid=n)

288

fctx = repo.filectx(f, fileid=n)

289

# setup only needed for filectx not create from a changectx

289

# setup only needed for filectx not create from a changectx

290

fctx._ancestrycontext = ac

290

fctx._ancestrycontext = ac

291

fctx._descendantrev = rev

291

fctx._descendantrev = rev

292

return fctx

292

return fctx

293

return util.lrucachefunc(makectx)

293

return util.lrucachefunc(makectx)

294

295

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

295

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

296

"""combine partial copy paths"""

296

"""combine partial copy paths"""

297

remainder = {}

297

remainder = {}

298

for f in copyfrom:

298

for f in copyfrom:

299

if f in copyto:

299

if f in copyto:

300

finalcopy[copyto[f]] = copyfrom[f]

300

finalcopy[copyto[f]] = copyfrom[f]

301

del copyto[f]

301

del copyto[f]

302

for f in incompletediverge:

302

for f in incompletediverge:

303

assert f not in diverge

303

assert f not in diverge

304

ic = incompletediverge[f]

304

ic = incompletediverge[f]

305

if ic[0] in copyto:

305

if ic[0] in copyto:

306

diverge[f] = [copyto[ic[0]], ic[1]]

306

diverge[f] = [copyto[ic[0]], ic[1]]

307

else:

307

else:

308

remainder[f] = ic

308

remainder[f] = ic

309

return remainder

309

return remainder

310

311

def mergecopies(repo, c1, c2, base):

311

def mergecopies(repo, c1, c2, base):

312

"""

312

"""

313

The function calling different copytracing algorithms on the basis of config

313

The function calling different copytracing algorithms on the basis of config

314

which find moves and copies between context c1 and c2 that are relevant for

314

which find moves and copies between context c1 and c2 that are relevant for

315

merging. 'base' will be used as the merge base.

315

merging. 'base' will be used as the merge base.

316

317

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

317

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

318

files that were moved/ copied in one merge parent and modified in another.

318

files that were moved/ copied in one merge parent and modified in another.

319

For example:

319

For example:

320

321

o ---> 4 another commit

321

o ---> 4 another commit

322

|

322

|

323

| o ---> 3 commit that modifies a.txt

323

| o ---> 3 commit that modifies a.txt

324

| /

324

| /

325

o / ---> 2 commit that moves a.txt to b.txt

325

o / ---> 2 commit that moves a.txt to b.txt

326

|/

326

|/

327

o ---> 1 merge base

327

o ---> 1 merge base

328

329

If we try to rebase revision 3 on revision 4, since there is no a.txt in

329

If we try to rebase revision 3 on revision 4, since there is no a.txt in

330

revision 4, and if user have copytrace disabled, we prints the following

330

revision 4, and if user have copytrace disabled, we prints the following

331

message:

331

message:

332

333

```other changed <file> which local deleted```

333

```other changed <file> which local deleted```

334

335

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

335

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

336

"dirmove".

336

"dirmove".

337

338

"copy" is a mapping from destination name -> source name,

338

"copy" is a mapping from destination name -> source name,

339

where source is in c1 and destination is in c2 or vice-versa.

339

where source is in c1 and destination is in c2 or vice-versa.

340

341

"movewithdir" is a mapping from source name -> destination name,

341

"movewithdir" is a mapping from source name -> destination name,

342

where the file at source present in one context but not the other

342

where the file at source present in one context but not the other

343

needs to be moved to destination by the merge process, because the

343

needs to be moved to destination by the merge process, because the

344

other context moved the directory it is in.

344

other context moved the directory it is in.

345

346

"diverge" is a mapping of source name -> list of destination names

346

"diverge" is a mapping of source name -> list of destination names

347

for divergent renames.

347

for divergent renames.

348

349

"renamedelete" is a mapping of source name -> list of destination

349

"renamedelete" is a mapping of source name -> list of destination

350

names for files deleted in c1 that were renamed in c2 or vice-versa.

350

names for files deleted in c1 that were renamed in c2 or vice-versa.

351

352

"dirmove" is a mapping of detected source dir -> destination dir renames.

352

"dirmove" is a mapping of detected source dir -> destination dir renames.

353

This is needed for handling changes to new files previously grafted into

353

This is needed for handling changes to new files previously grafted into

354

renamed directories.

354

renamed directories.

355

"""

355

"""

356

# avoid silly behavior for update from empty dir

356

# avoid silly behavior for update from empty dir

357

if not c1 or not c2 or c1 == c2:

357

if not c1 or not c2 or c1 == c2:

358

return {}, {}, {}, {}, {}

358

return {}, {}, {}, {}, {}

359

360

# avoid silly behavior for parent -> working dir

360

# avoid silly behavior for parent -> working dir

361

if c2.node() is None and c1.node() == repo.dirstate.p1():

361

if c2.node() is None and c1.node() == repo.dirstate.p1():

362

return repo.dirstate.copies(), {}, {}, {}, {}

362

return repo.dirstate.copies(), {}, {}, {}, {}

363

364

copytracing = repo.ui.config('experimental', 'copytrace')

364

copytracing = repo.ui.config('experimental', 'copytrace')

365

366

# Copy trace disabling is explicitly below the node == p1 logic above

366

# Copy trace disabling is explicitly below the node == p1 logic above

367

# because the logic above is required for a simple copy to be kept across a

367

# because the logic above is required for a simple copy to be kept across a

368

# rebase.

368

# rebase.

369

if copytracing == 'off':

369

if copytracing == 'off':

370

return {}, {}, {}, {}, {}

370

return {}, {}, {}, {}, {}

371

elif copytracing == 'heuristics':

371

elif copytracing == 'heuristics':

372

# Do full copytracing if only non-public revisions are involved as

372

# Do full copytracing if only non-public revisions are involved as

373

# that will be fast enough and will also cover the copies which could

373

# that will be fast enough and will also cover the copies which could

374

# be missed by heuristics

374

# be missed by heuristics

375

if _isfullcopytraceable(repo, c1, base):

375

if _isfullcopytraceable(repo, c1, base):

376

return _fullcopytracing(repo, c1, c2, base)

376

return _fullcopytracing(repo, c1, c2, base)

377

return _heuristicscopytracing(repo, c1, c2, base)

377

return _heuristicscopytracing(repo, c1, c2, base)

378

else:

378

else:

379

return _fullcopytracing(repo, c1, c2, base)

379

return _fullcopytracing(repo, c1, c2, base)

380

381

def _isfullcopytraceable(repo, c1, base):

381

def _isfullcopytraceable(repo, c1, base):

382

""" Checks that if base, source and destination are all no-public branches,

382

""" Checks that if base, source and destination are all no-public branches,

383

if yes let's use the full copytrace algorithm for increased capabilities

383

if yes let's use the full copytrace algorithm for increased capabilities

384

since it will be fast enough.

384

since it will be fast enough.

385

386

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

386

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

387

number of changesets from c1 to base such that if number of changesets are

387

number of changesets from c1 to base such that if number of changesets are

388

more than the limit, full copytracing algorithm won't be used.

388

more than the limit, full copytracing algorithm won't be used.

389

"""

389

"""

390

if c1.rev() is None:

390

if c1.rev() is None:

391

c1 = c1.p1()

391

c1 = c1.p1()

392

if c1.mutable() and base.mutable():

392

if c1.mutable() and base.mutable():

393

sourcecommitlimit = repo.ui.configint('experimental',

393

sourcecommitlimit = repo.ui.configint('experimental',

394

'copytrace.sourcecommitlimit')

394

'copytrace.sourcecommitlimit')

395

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

395

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

396

return commits < sourcecommitlimit

396

return commits < sourcecommitlimit

397

return False

397

return False

398

399

def _fullcopytracing(repo, c1, c2, base):

399

def _fullcopytracing(repo, c1, c2, base):

400

""" The full copytracing algorithm which finds all the new files that were

400

""" The full copytracing algorithm which finds all the new files that were

401

added from merge base up to the top commit and for each file it checks if

401

added from merge base up to the top commit and for each file it checks if

402

this file was copied from another file.

402

this file was copied from another file.

403

404

This is pretty slow when a lot of changesets are involved but will track all

404

This is pretty slow when a lot of changesets are involved but will track all

405

the copies.

405

the copies.

406

"""

406

"""

407

# In certain scenarios (e.g. graft, update or rebase), base can be

407

# In certain scenarios (e.g. graft, update or rebase), base can be

408

# overridden We still need to know a real common ancestor in this case We

408

# overridden We still need to know a real common ancestor in this case We

409

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

409

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

410

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

410

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

411

# caller may not know if the revision passed in lieu of the CA is a genuine

411

# caller may not know if the revision passed in lieu of the CA is a genuine

412

# common ancestor or not without explicitly checking it, it's better to

412

# common ancestor or not without explicitly checking it, it's better to

413

# determine that here.

413

# determine that here.

414

#

414

#

415

# base.descendant(wc) and base.descendant(base) are False, work around that

415

# base.descendant(wc) and base.descendant(base) are False, work around that

416

_c1 = c1.p1() if c1.rev() is None else c1

416

_c1 = c1.p1() if c1.rev() is None else c1

417

_c2 = c2.p1() if c2.rev() is None else c2

417

_c2 = c2.p1() if c2.rev() is None else c2

418

# an endpoint is "dirty" if it isn't a descendant of the merge base

418

# an endpoint is "dirty" if it isn't a descendant of the merge base

419

# if we have a dirty endpoint, we need to trigger graft logic, and also

419

# if we have a dirty endpoint, we need to trigger graft logic, and also

420

# keep track of which endpoint is dirty

420

# keep track of which endpoint is dirty

421

dirtyc1 = not (base == _c1 or base.descendant(_c1))

421

dirtyc1 = not (base == _c1 or base.descendant(_c1))

422

dirtyc2 = not (base == _c2 or base.descendant(_c2))

422

dirtyc2 = not (base == _c2 or base.descendant(_c2))

423

graft = dirtyc1 or dirtyc2

423

graft = dirtyc1 or dirtyc2

424

tca = base

424

tca = base

425

if graft:

425

if graft:

426

tca = _c1.ancestor(_c2)

426

tca = _c1.ancestor(_c2)

427

428

limit = _findlimit(repo, c1.rev(), c2.rev())

428

limit = _findlimit(repo, c1.rev(), c2.rev())

429

if limit is None:

429

if limit is None:

430

# no common ancestor, no copies

430

# no common ancestor, no copies

431

return {}, {}, {}, {}, {}

431

return {}, {}, {}, {}, {}

432

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

432

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

433

434

m1 = c1.manifest()

434

m1 = c1.manifest()

435

m2 = c2.manifest()

435

m2 = c2.manifest()

436

mb = base.manifest()

436

mb = base.manifest()

437

438

# gather data from _checkcopies:

438

# gather data from _checkcopies:

439

# - diverge = record all diverges in this dict

439

# - diverge = record all diverges in this dict

440

# - copy = record all non-divergent copies in this dict

440

# - copy = record all non-divergent copies in this dict

441

# - fullcopy = record all copies in this dict

441

# - fullcopy = record all copies in this dict

442

# - incomplete = record non-divergent partial copies here

442

# - incomplete = record non-divergent partial copies here

443

# - incompletediverge = record divergent partial copies here

443

# - incompletediverge = record divergent partial copies here

444

diverge = {} # divergence data is shared

444

diverge = {} # divergence data is shared

445

incompletediverge = {}

445

incompletediverge = {}

446

data1 = {'copy': {},

446

data1 = {'copy': {},

447

'fullcopy': {},

447

'fullcopy': {},

448

'incomplete': {},

448

'incomplete': {},

449

'diverge': diverge,

449

'diverge': diverge,

450

'incompletediverge': incompletediverge,

450

'incompletediverge': incompletediverge,

451

}

451

}

452

data2 = {'copy': {},

452

data2 = {'copy': {},

453

'fullcopy': {},

453

'fullcopy': {},

454

'incomplete': {},

454

'incomplete': {},

455

'diverge': diverge,

455

'diverge': diverge,

456

'incompletediverge': incompletediverge,

456

'incompletediverge': incompletediverge,

457

}

457

}

458

459

# find interesting file sets from manifests

459

# find interesting file sets from manifests

460

addedinm1 = m1.filesnotin(mb)

460

addedinm1 = m1.filesnotin(mb)

461

addedinm2 = m2.filesnotin(mb)

461

addedinm2 = m2.filesnotin(mb)

462

bothnew = sorted(addedinm1 & addedinm2)

462

bothnew = sorted(addedinm1 & addedinm2)

463

if tca == base:

463

if tca == base:

464

# unmatched file from base

464

# unmatched file from base

465

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

465

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

466

u1u, u2u = u1r, u2r

466

u1u, u2u = u1r, u2r

467

else:

467

else:

468

# unmatched file from base (DAG rotation in the graft case)

468

# unmatched file from base (DAG rotation in the graft case)

469

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

469

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

470

baselabel='base')

470

baselabel='base')

471

# unmatched file from topological common ancestors (no DAG rotation)

471

# unmatched file from topological common ancestors (no DAG rotation)

472

# need to recompute this for directory move handling when grafting

472

# need to recompute this for directory move handling when grafting

473

mta = tca.manifest()

473

mta = tca.manifest()

474

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

474

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

475

m2.filesnotin(mta),

475

m2.filesnotin(mta),

476

baselabel='topological common ancestor')

476

baselabel='topological common ancestor')

477

478

for f in u1u:

478

for f in u1u:

479

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

479

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

480

481

for f in u2u:

481

for f in u2u:

482

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

482

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

483

484

copy = dict(data1['copy'])

484

copy = dict(data1['copy'])

485

copy.update(data2['copy'])

485

copy.update(data2['copy'])

486

fullcopy = dict(data1['fullcopy'])

486

fullcopy = dict(data1['fullcopy'])

487

fullcopy.update(data2['fullcopy'])

487

fullcopy.update(data2['fullcopy'])

488

489

if dirtyc1:

489

if dirtyc1:

490

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

490

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

491

incompletediverge)

491

incompletediverge)

492

else:

492

else:

493

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

493

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

494

incompletediverge)

494

incompletediverge)

495

496

renamedelete = {}

496

renamedelete = {}

497

renamedeleteset = set()

497

renamedeleteset = set()

498

divergeset = set()

498

divergeset = set()

499

for of, fl in list(diverge.items()):

499

for of, fl in list(diverge.items()):

500

if len(fl) == 1 or of in c1 or of in c2:

500

if len(fl) == 1 or of in c1 or of in c2:

501

del diverge[of] # not actually divergent, or not a rename

501

del diverge[of] # not actually divergent, or not a rename

502

if of not in c1 and of not in c2:

502

if of not in c1 and of not in c2:

503

# renamed on one side, deleted on the other side, but filter

503

# renamed on one side, deleted on the other side, but filter

504

# out files that have been renamed and then deleted

504

# out files that have been renamed and then deleted

505

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

505

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

506

renamedeleteset.update(fl) # reverse map for below

506

renamedeleteset.update(fl) # reverse map for below

507

else:

507

else:

508

divergeset.update(fl) # reverse map for below

508

divergeset.update(fl) # reverse map for below

509

510

if bothnew:

510

if bothnew:

511

repo.ui.debug(" unmatched files new in both:\n %s\n"

511

repo.ui.debug(" unmatched files new in both:\n %s\n"

512

% "\n ".join(bothnew))

512

% "\n ".join(bothnew))

513

bothdiverge = {}

513

bothdiverge = {}

514

bothincompletediverge = {}

514

bothincompletediverge = {}

515

remainder = {}

515

remainder = {}

516

both1 = {'copy': {},

516

both1 = {'copy': {},

517

'fullcopy': {},

517

'fullcopy': {},

518

'incomplete': {},

518

'incomplete': {},

519

'diverge': bothdiverge,

519

'diverge': bothdiverge,

520

'incompletediverge': bothincompletediverge

520

'incompletediverge': bothincompletediverge

521

}

521

}

522

both2 = {'copy': {},

522

both2 = {'copy': {},

523

'fullcopy': {},

523

'fullcopy': {},

524

'incomplete': {},

524

'incomplete': {},

525

'diverge': bothdiverge,

525

'diverge': bothdiverge,

526

'incompletediverge': bothincompletediverge

526

'incompletediverge': bothincompletediverge

527

}

527

}

528

for f in bothnew:

528

for f in bothnew:

529

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

529

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

530

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

530

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

531

if dirtyc1:

531

if dirtyc1:

532

# incomplete copies may only be found on the "dirty" side for bothnew

532

# incomplete copies may only be found on the "dirty" side for bothnew

533

assert not both2['incomplete']

533

assert not both2['incomplete']

534

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

534

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

535

bothincompletediverge)

535

bothincompletediverge)

536

elif dirtyc2:

536

elif dirtyc2:

537

assert not both1['incomplete']

537

assert not both1['incomplete']

538

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

538

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

539

bothincompletediverge)

539

bothincompletediverge)

540

else:

540

else:

541

# incomplete copies and divergences can't happen outside grafts

541

# incomplete copies and divergences can't happen outside grafts

542

assert not both1['incomplete']

542

assert not both1['incomplete']

543

assert not both2['incomplete']

543

assert not both2['incomplete']

544

assert not bothincompletediverge

544

assert not bothincompletediverge

545

for f in remainder:

545

for f in remainder:

546

assert f not in bothdiverge

546

assert f not in bothdiverge

547

ic = remainder[f]

547

ic = remainder[f]

548

if ic[0] in (m1 if dirtyc1 else m2):

548

if ic[0] in (m1 if dirtyc1 else m2):

549

# backed-out rename on one side, but watch out for deleted files

549

# backed-out rename on one side, but watch out for deleted files

550

bothdiverge[f] = ic

550

bothdiverge[f] = ic

551

for of, fl in bothdiverge.items():

551

for of, fl in bothdiverge.items():

552

if len(fl) == 2 and fl[0] == fl[1]:

552

if len(fl) == 2 and fl[0] == fl[1]:

553

copy[fl[0]] = of # not actually divergent, just matching renames

553

copy[fl[0]] = of # not actually divergent, just matching renames

554

555

if fullcopy and repo.ui.debugflag:

555

if fullcopy and repo.ui.debugflag:

556

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

556

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

557

"% = renamed and deleted):\n")

557

"% = renamed and deleted):\n")

558

for f in sorted(fullcopy):

558

for f in sorted(fullcopy):

559

note = ""

559

note = ""

560

if f in copy:

560

if f in copy:

561

note += "*"

561

note += "*"

562

if f in divergeset:

562

if f in divergeset:

563

note += "!"

563

note += "!"

564

if f in renamedeleteset:

564

if f in renamedeleteset:

565

note += "%"

565

note += "%"

566

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

566

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

567

note))

567

note))

568

del divergeset

568

del divergeset

569

570

if not fullcopy:

570

if not fullcopy:

571

return copy, {}, diverge, renamedelete, {}

571

return copy, {}, diverge, renamedelete, {}

572

573

repo.ui.debug(" checking for directory renames\n")

573

repo.ui.debug(" checking for directory renames\n")

574

575

# generate a directory move map

575

# generate a directory move map

576

d1, d2 = c1.dirs(), c2.dirs()

576

d1, d2 = c1.dirs(), c2.dirs()

577

# Hack for adding '', which is not otherwise added, to d1 and d2

577

# Hack for adding '', which is not otherwise added, to d1 and d2

578

d1.addpath('/')

578

d1.addpath('/')

579

d2.addpath('/')

579

d2.addpath('/')

580

invalid = set()

580

invalid = set()

581

dirmove = {}

581

dirmove = {}

582

583

# examine each file copy for a potential directory move, which is

583

# examine each file copy for a potential directory move, which is

584

# when all the files in a directory are moved to a new directory

584

# when all the files in a directory are moved to a new directory

585

for dst, src in fullcopy.iteritems():

585

for dst, src in fullcopy.iteritems():

586

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

586

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

587

if dsrc in invalid:

587

if dsrc in invalid:

588

# already seen to be uninteresting

588

# already seen to be uninteresting

589

continue

589

continue

590

elif dsrc in d1 and ddst in d1:

590

elif dsrc in d1 and ddst in d1:

591

# directory wasn't entirely moved locally

591

# directory wasn't entirely moved locally

592

invalid.add(dsrc + "/")

592

invalid.add(dsrc + "/")

593

elif dsrc in d2 and ddst in d2:

593

elif dsrc in d2 and ddst in d2:

594

# directory wasn't entirely moved remotely

594

# directory wasn't entirely moved remotely

595

invalid.add(dsrc + "/")

595

invalid.add(dsrc + "/")

596

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

596

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

597

# files from the same directory moved to two different places

597

# files from the same directory moved to two different places

598

invalid.add(dsrc + "/")

598

invalid.add(dsrc + "/")

599

else:

599

else:

600

# looks good so far

600

# looks good so far

601

dirmove[dsrc + "/"] = ddst + "/"

601

dirmove[dsrc + "/"] = ddst + "/"

602

603

for i in invalid:

603

for i in invalid:

604

if i in dirmove:

604

if i in dirmove:

605

del dirmove[i]

605

del dirmove[i]

606

del d1, d2, invalid

606

del d1, d2, invalid

607

608

if not dirmove:

608

if not dirmove:

609

return copy, {}, diverge, renamedelete, {}

609

return copy, {}, diverge, renamedelete, {}

610

611

for d in dirmove:

611

for d in dirmove:

612

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

612

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

613

(d, dirmove[d]))

613

(d, dirmove[d]))

614

615

movewithdir = {}

615

movewithdir = {}

616

# check unaccounted nonoverlapping files against directory moves

616

# check unaccounted nonoverlapping files against directory moves

617

for f in u1r + u2r:

617

for f in u1r + u2r:

618

if f not in fullcopy:

618

if f not in fullcopy:

619

for d in dirmove:

619

for d in dirmove:

620

if f.startswith(d):

620

if f.startswith(d):

621

# new file added in a directory that was moved, move it

621

# new file added in a directory that was moved, move it

622

df = dirmove[d] + f[len(d):]

622

df = dirmove[d] + f[len(d):]

623

if df not in copy:

623

if df not in copy:

624

movewithdir[f] = df

624

movewithdir[f] = df

625

repo.ui.debug((" pending file src: '%s' -> "

625

repo.ui.debug((" pending file src: '%s' -> "

626

"dst: '%s'\n") % (f, df))

626

"dst: '%s'\n") % (f, df))

627

break

627

break

628

629

return copy, movewithdir, diverge, renamedelete, dirmove

629

return copy, movewithdir, diverge, renamedelete, dirmove

630

631

def _heuristicscopytracing(repo, c1, c2, base):

631

def _heuristicscopytracing(repo, c1, c2, base):

632

""" Fast copytracing using filename heuristics

632

""" Fast copytracing using filename heuristics

633

634

Assumes that moves or renames are of following two types:

634

Assumes that moves or renames are of following two types:

635

636

1) Inside a directory only (same directory name but different filenames)

636

1) Inside a directory only (same directory name but different filenames)

637

2) Move from one directory to another

637

2) Move from one directory to another

638

(same filenames but different directory names)

638

(same filenames but different directory names)

639

640

Works only when there are no merge commits in the "source branch".

640

Works only when there are no merge commits in the "source branch".

641

Source branch is commits from base up to c2 not including base.

641

Source branch is commits from base up to c2 not including base.

642

643

If merge is involved it fallbacks to _fullcopytracing().

643

If merge is involved it fallbacks to _fullcopytracing().

644

645

Can be used by setting the following config:

645

Can be used by setting the following config:

646

647

[experimental]

647

[experimental]

648

copytrace = heuristics

648

copytrace = heuristics

649

650

In some cases the copy/move candidates found by heuristics can be very large

650

In some cases the copy/move candidates found by heuristics can be very large

651

in number and that will make the algorithm slow. The number of possible

651

in number and that will make the algorithm slow. The number of possible

652

candidates to check can be limited by using the config

652

candidates to check can be limited by using the config

653

`experimental.copytrace.movecandidateslimit` which defaults to 100.

653

`experimental.copytrace.movecandidateslimit` which defaults to 100.

654

"""

654

"""

655

656

if c1.rev() is None:

656

if c1.rev() is None:

657

c1 = c1.p1()

657

c1 = c1.p1()

658

if c2.rev() is None:

658

if c2.rev() is None:

659

c2 = c2.p1()

659

c2 = c2.p1()

660

661

copies = {}

661

copies = {}

662

663

changedfiles = set()

663

changedfiles = set()

664

m1 = c1.manifest()

664

m1 = c1.manifest()

665

if not repo.revs('%d::%d', base.rev(), c2.rev()):

665

if not repo.revs('%d::%d', base.rev(), c2.rev()):

666

# If base is not in c2 branch, we switch to fullcopytracing

666

# If base is not in c2 branch, we switch to fullcopytracing

667

repo.ui.debug("switching to full copytracing as base is not "

667

repo.ui.debug("switching to full copytracing as base is not "

668

"an ancestor of c2\n")

668

"an ancestor of c2\n")

669

return _fullcopytracing(repo, c1, c2, base)

669

return _fullcopytracing(repo, c1, c2, base)

670

671

ctx = c2

671

ctx = c2

672

while ctx != base:

672

while ctx != base:

673

if len(ctx.parents()) == 2:

673

if len(ctx.parents()) == 2:

674

# To keep things simple let's not handle merges

674

# To keep things simple let's not handle merges

675

repo.ui.debug("switching to full copytracing because of merges\n")

675

repo.ui.debug("switching to full copytracing because of merges\n")

676

return _fullcopytracing(repo, c1, c2, base)

676

return _fullcopytracing(repo, c1, c2, base)

677

changedfiles.update(ctx.files())

677

changedfiles.update(ctx.files())

678

ctx = ctx.p1()

678

ctx = ctx.p1()

679

680

cp = _forwardcopies(base, c2)

680

cp = _forwardcopies(base, c2)

681

for dst, src in cp.iteritems():

681

for dst, src in cp.iteritems():

682

if src in m1:

682

if src in m1:

683

copies[dst] = src

683

copies[dst] = src

684

685

# file is missing if it isn't present in the destination, but is present in

685

# file is missing if it isn't present in the destination, but is present in

686

# the base and present in the source.

686

# the base and present in the source.

687

# Presence in the base is important to exclude added files, presence in the

687

# Presence in the base is important to exclude added files, presence in the

688

# source is important to exclude removed files.

688

# source is important to exclude removed files.

689

missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,

689

missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,

690

changedfiles)

690

changedfiles)

691

692

if missingfiles:

692

if missingfiles:

693

basenametofilename = collections.defaultdict(list)

693

basenametofilename = collections.defaultdict(list)

694

dirnametofilename = collections.defaultdict(list)

694

dirnametofilename = collections.defaultdict(list)

695

696

for f in m1.filesnotin(base.manifest()):

696

for f in m1.filesnotin(base.manifest()):

697

basename = os.path.basename(f)

697

basename = os.path.basename(f)

698

dirname = os.path.dirname(f)

698

dirname = os.path.dirname(f)

699

basenametofilename[basename].append(f)

699

basenametofilename[basename].append(f)

700

dirnametofilename[dirname].append(f)

700

dirnametofilename[dirname].append(f)

701

702

# in case of a rebase/graft, base may not be a common ancestor

702

# in case of a rebase/graft, base may not be a common ancestor

703

anc = c1.ancestor(c2)

703

anc = c1.ancestor(c2)

704

705

for f in missingfiles:

705

for f in missingfiles:

706

basename = os.path.basename(f)

706

basename = os.path.basename(f)

707

dirname = os.path.dirname(f)

707

dirname = os.path.dirname(f)

708

samebasename = basenametofilename[basename]

708

samebasename = basenametofilename[basename]

709

samedirname = dirnametofilename[dirname]

709

samedirname = dirnametofilename[dirname]

710

movecandidates = samebasename + samedirname

710

movecandidates = samebasename + samedirname

711

# f is guaranteed to be present in c2, that's why

711

# f is guaranteed to be present in c2, that's why

712

# c2.filectx(f) won't fail

712

# c2.filectx(f) won't fail

713

f2 = c2.filectx(f)

713

f2 = c2.filectx(f)

714

# we can have a lot of candidates which can slow down the heuristics

714

# we can have a lot of candidates which can slow down the heuristics

715

# config value to limit the number of candidates moves to check

715

# config value to limit the number of candidates moves to check

716

maxcandidates = repo.ui.configint('experimental',

716

maxcandidates = repo.ui.configint('experimental',

717

'copytrace.movecandidateslimit')

717

'copytrace.movecandidateslimit')

718

719

if len(movecandidates) > maxcandidates:

719

if len(movecandidates) > maxcandidates:

720

repo.ui.status(_("skipping copytracing for '%s', more "

720

repo.ui.status(_("skipping copytracing for '%s', more "

721

"candidates than the limit: %d\n")

721

"candidates than the limit: %d\n")

722

% (f, len(movecandidates)))

722

% (f, len(movecandidates)))

723

continue

723

continue

724

725

for candidate in movecandidates:

725

for candidate in movecandidates:

726

f1 = c1.filectx(candidate)

726

f1 = c1.filectx(candidate)

727

if _related(f1, f2, anc.rev()):

727

if _related(f1, f2, anc.rev()):

728

# if there are a few related copies then we'll merge

728

# if there are a few related copies then we'll merge

729

# changes into all of them. This matches the behaviour

729

# changes into all of them. This matches the behaviour

730

# of upstream copytracing

730

# of upstream copytracing

731

copies[candidate] = f

731

copies[candidate] = f

732

733

return copies, {}, {}, {}, {}

733

return copies, {}, {}, {}, {}

734

735

def _related(f1, f2, limit):

735

def _related(f1, f2, limit):

736

"""return True if f1 and f2 filectx have a common ancestor

736

"""return True if f1 and f2 filectx have a common ancestor

737

738

Walk back to common ancestor to see if the two files originate

738

Walk back to common ancestor to see if the two files originate

739

from the same file. Since workingfilectx's rev() is None it messes

739

from the same file. Since workingfilectx's rev() is None it messes

740

up the integer comparison logic, hence the pre-step check for

740

up the integer comparison logic, hence the pre-step check for

741

None (f1 and f2 can only be workingfilectx's initially).

741

None (f1 and f2 can only be workingfilectx's initially).

742

"""

742

"""

743

744

if f1 == f2:

744

if f1 == f2:

745

return f1 # a match

745

return f1 # a match

746

747

g1, g2 = f1.ancestors(), f2.ancestors()

747

g1, g2 = f1.ancestors(), f2.ancestors()

748

try:

748

try:

749

f1r, f2r = f1.linkrev(), f2.linkrev()

749

f1r, f2r = f1.linkrev(), f2.linkrev()

750

751

if f1r is None:

751

if f1r is None:

752

f1 = next(g1)

752

f1 = next(g1)

753

if f2r is None:

753

if f2r is None:

754

f2 = next(g2)

754

f2 = next(g2)

755

756

while True:

756

while True:

757

f1r, f2r = f1.linkrev(), f2.linkrev()

757

f1r, f2r = f1.linkrev(), f2.linkrev()

758

if f1r > f2r:

758

if f1r > f2r:

759

f1 = next(g1)

759

f1 = next(g1)

760

elif f2r > f1r:

760

elif f2r > f1r:

761

f2 = next(g2)

761

f2 = next(g2)

762

elif f1 == f2:

762

elif f1 == f2:

763

return f1 # a match

763

return f1 # a match

764

elif f1r == f2r or f1r < limit or f2r < limit:

764

elif f1r == f2r or f1r < limit or f2r < limit:

765

return False # copy no longer relevant

765

return False # copy no longer relevant

766

except StopIteration:

766

except StopIteration:

767

return False

767

return False

768

769

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

769

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

770

"""

770

"""

771

check possible copies of f from msrc to mdst

771

check possible copies of f from msrc to mdst

772

773

srcctx = starting context for f in msrc

773

srcctx = starting context for f in msrc

774

dstctx = destination context for f in mdst

774

dstctx = destination context for f in mdst

775

f = the filename to check (as in msrc)

775

f = the filename to check (as in msrc)

776

base = the changectx used as a merge base

776

base = the changectx used as a merge base

777

tca = topological common ancestor for graft-like scenarios

777

tca = topological common ancestor for graft-like scenarios

778

remotebase = True if base is outside tca::srcctx, False otherwise

778

remotebase = True if base is outside tca::srcctx, False otherwise

779

limit = the rev number to not search beyond

779

limit = the rev number to not search beyond

780

data = dictionary of dictionary to store copy data. (see mergecopies)

780

data = dictionary of dictionary to store copy data. (see mergecopies)

781

782

note: limit is only an optimization, and provides no guarantee that

782

note: limit is only an optimization, and provides no guarantee that

783

irrelevant revisions will not be visited

783

irrelevant revisions will not be visited

784

there is no easy way to make this algorithm stop in a guaranteed way

784

there is no easy way to make this algorithm stop in a guaranteed way

785

once it "goes behind a certain revision".

785

once it "goes behind a certain revision".

786

"""

786

"""

787

788

msrc = srcctx.manifest()

788

msrc = srcctx.manifest()

789

mdst = dstctx.manifest()

789

mdst = dstctx.manifest()

790

mb = base.manifest()

790

mb = base.manifest()

791

mta = tca.manifest()

791

mta = tca.manifest()

792

# Might be true if this call is about finding backward renames,

792

# Might be true if this call is about finding backward renames,

793

# This happens in the case of grafts because the DAG is then rotated.

793

# This happens in the case of grafts because the DAG is then rotated.

794

# If the file exists in both the base and the source, we are not looking

794

# If the file exists in both the base and the source, we are not looking

795

# for a rename on the source side, but on the part of the DAG that is

795

# for a rename on the source side, but on the part of the DAG that is

796

# traversed backwards.

796

# traversed backwards.

797

#

797

#

798

# In the case there is both backward and forward renames (before and after

798

# In the case there is both backward and forward renames (before and after

799

# the base) this is more complicated as we must detect a divergence.

799

# the base) this is more complicated as we must detect a divergence.

800

# We use 'backwards = False' in that case.

800

# We use 'backwards = False' in that case.

801

backwards = not remotebase and base != tca and f in mb

801

backwards = not remotebase and base != tca and f in mb

802

getsrcfctx = _makegetfctx(srcctx)

802

getsrcfctx = _makegetfctx(srcctx)

803

getdstfctx = _makegetfctx(dstctx)

803

getdstfctx = _makegetfctx(dstctx)

804

805

if msrc[f] == mb.get(f) and not remotebase:

805

if msrc[f] == mb.get(f) and not remotebase:

806

# Nothing to merge

806

# Nothing to merge

807

return

807

return

808

809

of = None

809

of = None

810

seen = {f}

810

seen = {f}

811

for oc in getsrcfctx(f, msrc[f]).ancestors():

811

for oc in getsrcfctx(f, msrc[f]).ancestors():

812

ocr = oc.linkrev()

812

ocr = oc.linkrev()

813

of = oc.path()

813

of = oc.path()

814

if of in seen:

814

if of in seen:

815

# check limit late - grab last rename before

815

# check limit late - grab last rename before

816

if ocr < limit:

816

if ocr < limit:

817

break

817

break

818

continue

818

continue

819

seen.add(of)

819

seen.add(of)

820

821

# remember for dir rename detection

821

# remember for dir rename detection

822

if backwards:

822

if backwards:

823

data['fullcopy'][of] = f # grafting backwards through renames

823

data['fullcopy'][of] = f # grafting backwards through renames

824

else:

824

else:

825

data['fullcopy'][f] = of

825

data['fullcopy'][f] = of

826

if of not in mdst:

826

if of not in mdst:

827

continue # no match, keep looking

827

continue # no match, keep looking

828

if mdst[of] == mb.get(of):

828

if mdst[of] == mb.get(of):

829

return # no merge needed, quit early

829

return # no merge needed, quit early

830

c2 = getdstfctx(of, mdst[of])

830

c2 = getdstfctx(of, mdst[of])

831

# c2 might be a plain new file on added on destination side that is

831

# c2 might be a plain new file on added on destination side that is

832

# unrelated to the droids we are looking for.

832

# unrelated to the droids we are looking for.

833

cr = _related(oc, c2, tca.rev())

833

cr = _related(oc, c2, tca.rev())

834

if cr and (of == f or of == c2.path()): # non-divergent

834

if cr and (of == f or of == c2.path()): # non-divergent

835

if backwards:

835

if backwards:

836

data['copy'][of] = f

836

data['copy'][of] = f

837

elif of in mb:

837

elif of in mb:

838

data['copy'][f] = of

838

data['copy'][f] = of

839

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

839

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

840

data['copy'][of] = f

840

data['copy'][of] = f

841

del data['fullcopy'][f]

841

del data['fullcopy'][f]

842

data['fullcopy'][of] = f

842

data['fullcopy'][of] = f

843

else: # divergence w.r.t. graft CA on one side of topological CA

843

else: # divergence w.r.t. graft CA on one side of topological CA

844

for sf in seen:

844

for sf in seen:

845

if sf in mb:

845

if sf in mb:

846

assert sf not in data['diverge']

846

assert sf not in data['diverge']

847

data['diverge'][sf] = [f, of]

847

data['diverge'][sf] = [f, of]

848

break

848

break

849

return

849

return

850

851

if of in mta:

851

if of in mta:

852

if backwards or remotebase:

852

if backwards or remotebase:

853

data['incomplete'][of] = f

853

data['incomplete'][of] = f

854

else:

854

else:

855

for sf in seen:

855

for sf in seen:

856

if sf in mb:

856

if sf in mb:

857

if tca == base:

857

if tca == base:

858

data['diverge'].setdefault(sf, []).append(f)

858

data['diverge'].setdefault(sf, []).append(f)

859

else:

859

else:

860

data['incompletediverge'][sf] = [of, f]

860

data['incompletediverge'][sf] = [of, f]

861

return

861

return

862

863

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

863

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

864

'''reproduce copies from fromrev to rev in the dirstate

864

'''reproduce copies from fromrev to rev in the dirstate

865

866

If skiprev is specified, it's a revision that should be used to

866

If skiprev is specified, it's a revision that should be used to

867

filter copy records. Any copies that occur between fromrev and

867

filter copy records. Any copies that occur between fromrev and

868

skiprev will not be duplicated, even if they appear in the set of

868

skiprev will not be duplicated, even if they appear in the set of

869

copies between fromrev and rev.

869

copies between fromrev and rev.

870

'''

870

'''

871

exclude = {}

871

exclude = {}

872

if (skiprev is not None and

872

if (skiprev is not None and

873

repo.ui.config('experimental', 'copytrace') != 'off'):

873

repo.ui.config('experimental', 'copytrace') != 'off'):

874

# copytrace='off' skips this line, but not the entire function because

874

# copytrace='off' skips this line, but not the entire function because

875

# the line below is O(size of the repo) during a rebase, while the rest

875

# the line below is O(size of the repo) during a rebase, while the rest

876

# of the function is much faster (and is required for carrying copy

876

# of the function is much faster (and is required for carrying copy

877

# metadata across the rebase anyway).

877

# metadata across the rebase anyway).

878

exclude = pathcopies(repo[fromrev], repo[skiprev])

878

exclude = pathcopies(repo[fromrev], repo[skiprev])

879

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

879

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

880

# copies.pathcopies returns backward renames, so dst might not

880

# copies.pathcopies returns backward renames, so dst might not

881

# actually be in the dirstate

881

# actually be in the dirstate

882

if dst in exclude:

882

if dst in exclude:

883

continue

883

continue

884

wctx[dst].markcopied(src)

884

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
-            def _dirstatecopies(d):
+            def _dirstatecopies(d, match=None):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
-                    if ds[k] not in 'anm':
+                    if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
-                        return _dirstatecopies(w)
+                        return _dirstatecopies(w, match)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
-                    cm = _chain(a, w, cm, _dirstatecopies(w))
+                    cm = _chain(a, w, cm, _dirstatecopies(w, match))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirnodes:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'off':
                     return {}, {}, {}, {}, {}
                 elif copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.descendant(wc) and base.descendant(base) are False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not (base == _c1 or base.descendant(_c1))
                 dirtyc2 = not (base == _c2 or base.descendant(_c2))
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb)
                 addedinm2 = m2.filesnotin(mb)
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
                                                                 m2.filesnotin(mta),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
                                       changedfiles)
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     # in case of a rebase/graft, base may not be a common ancestor
                     anc = c1.ancestor(c2)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2, anc.rev()):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2, limit):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         elif f1 == f2:
                             return f1 # a match
                         elif f1r == f2r or f1r < limit or f2r < limit:
                             return False # copy no longer relevant
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, tca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if (skiprev is not None and
                     repo.ui.config('experimental', 'copytrace') != 'off'):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)