upstream/mercurial-mirror Commit - r35423:8801cdce

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

24

def _findlimit(repo, a, b):

24

def _findlimit(repo, a, b):

25

"""

25

"""

26

Find the last revision that needs to be checked to ensure that a full

26

Find the last revision that needs to be checked to ensure that a full

27

transitive closure for file copies can be properly calculated.

27

transitive closure for file copies can be properly calculated.

28

Generally, this means finding the earliest revision number that's an

28

Generally, this means finding the earliest revision number that's an

29

ancestor of a or b but not both, except when a or b is a direct descendent

29

ancestor of a or b but not both, except when a or b is a direct descendent

30

of the other, in which case we can return the minimum revnum of a and b.

30

of the other, in which case we can return the minimum revnum of a and b.

31

None if no such revision exists.

31

None if no such revision exists.

32

"""

32

"""

33

34

# basic idea:

34

# basic idea:

35

# - mark a and b with different sides

35

# - mark a and b with different sides

36

# - if a parent's children are all on the same side, the parent is

36

# - if a parent's children are all on the same side, the parent is

37

# on that side, otherwise it is on no side

37

# on that side, otherwise it is on no side

38

# - walk the graph in topological order with the help of a heap;

38

# - walk the graph in topological order with the help of a heap;

39

# - add unseen parents to side map

39

# - add unseen parents to side map

40

# - clear side of any parent that has children on different sides

40

# - clear side of any parent that has children on different sides

41

# - track number of interesting revs that might still be on a side

41

# - track number of interesting revs that might still be on a side

42

# - track the lowest interesting rev seen

42

# - track the lowest interesting rev seen

43

# - quit when interesting revs is zero

43

# - quit when interesting revs is zero

44

45

cl = repo.changelog

45

cl = repo.changelog

46

working = len(cl) # pseudo rev for the working directory

46

working = len(cl) # pseudo rev for the working directory

47

if a is None:

47

if a is None:

48

a = working

48

a = working

49

if b is None:

49

if b is None:

50

b = working

50

b = working

51

52

side = {a: -1, b: 1}

52

side = {a: -1, b: 1}

53

visit = [-a, -b]

53

visit = [-a, -b]

54

heapq.heapify(visit)

54

heapq.heapify(visit)

55

interesting = len(visit)

55

interesting = len(visit)

56

hascommonancestor = False

56

hascommonancestor = False

57

limit = working

57

limit = working

58

59

while interesting:

59

while interesting:

60

r = -heapq.heappop(visit)

60

r = -heapq.heappop(visit)

61

if r == working:

61

if r == working:

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

63

else:

63

else:

64

parents = cl.parentrevs(r)

64

parents = cl.parentrevs(r)

65

for p in parents:

65

for p in parents:

66

if p < 0:

66

if p < 0:

67

continue

67

continue

68

if p not in side:

68

if p not in side:

69

# first time we see p; add it to visit

69

# first time we see p; add it to visit

70

side[p] = side[r]

70

side[p] = side[r]

71

if side[p]:

71

if side[p]:

72

interesting += 1

72

interesting += 1

73

heapq.heappush(visit, -p)

73

heapq.heappush(visit, -p)

74

elif side[p] and side[p] != side[r]:

74

elif side[p] and side[p] != side[r]:

75

# p was interesting but now we know better

75

# p was interesting but now we know better

76

side[p] = 0

76

side[p] = 0

77

interesting -= 1

77

interesting -= 1

78

hascommonancestor = True

78

hascommonancestor = True

79

if side[r]:

79

if side[r]:

80

limit = r # lowest rev visited

80

limit = r # lowest rev visited

81

interesting -= 1

81

interesting -= 1

82

83

if not hascommonancestor:

83

if not hascommonancestor:

84

return None

84

return None

85

86

# Consider the following flow (see test-commit-amend.t under issue4405):

86

# Consider the following flow (see test-commit-amend.t under issue4405):

87

# 1/ File 'a0' committed

87

# 1/ File 'a0' committed

88

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

88

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

89

# 3/ Move back to first commit

89

# 3/ Move back to first commit

90

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

90

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

91

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

91

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

92

#

92

#

93

# During the amend in step five, we will be in this state:

93

# During the amend in step five, we will be in this state:

94

#

94

#

95

# @ 3 temporary amend commit for a1-amend

95

# @ 3 temporary amend commit for a1-amend

96

# |

96

# |

97

# o 2 a1-amend

97

# o 2 a1-amend

98

# |

98

# |

99

# | o 1 a1

99

# | o 1 a1

100

# |/

100

# |/

101

# o 0 a0

101

# o 0 a0

102

#

102

#

103

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

103

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

104

# yet the filelog has the copy information in rev 1 and we will not look

104

# yet the filelog has the copy information in rev 1 and we will not look

105

# back far enough unless we also look at the a and b as candidates.

105

# back far enough unless we also look at the a and b as candidates.

106

# This only occurs when a is a descendent of b or visa-versa.

106

# This only occurs when a is a descendent of b or visa-versa.

107

return min(limit, a, b)

107

return min(limit, a, b)

108

109

def _chain(src, dst, a, b):

109

def _chain(src, dst, a, b):

110

"""chain two sets of copies a->b"""

110

"""chain two sets of copies a->b"""

111

t = a.copy()

111

t = a.copy()

112

for k, v in b.iteritems():

112

for k, v in b.iteritems():

113

if v in t:

113

if v in t:

114

# found a chain

114

# found a chain

115

if t[v] != k:

115

if t[v] != k:

116

# file wasn't renamed back to itself

116

# file wasn't renamed back to itself

117

t[k] = t[v]

117

t[k] = t[v]

118

if v not in dst:

118

if v not in dst:

119

# chain was a rename, not a copy

119

# chain was a rename, not a copy

120

del t[v]

120

del t[v]

121

if v in src:

121

if v in src:

122

# file is a copy of an existing file

122

# file is a copy of an existing file

123

t[k] = v

123

t[k] = v

124

125

# remove criss-crossed copies

125

# remove criss-crossed copies

126

for k, v in t.items():

126

for k, v in t.items():

127

if k in src and v in dst:

127

if k in src and v in dst:

128

del t[k]

128

del t[k]

129

130

return t

130

return t

131

132

def _tracefile(fctx, am, limit=-1):

132

def _tracefile(fctx, am, limit=-1):

133

"""return file context that is the ancestor of fctx present in ancestor

133

"""return file context that is the ancestor of fctx present in ancestor

134

manifest am, stopping after the first ancestor lower than limit"""

134

manifest am, stopping after the first ancestor lower than limit"""

135

136

for f in fctx.ancestors():

136

for f in fctx.ancestors():

137

if am.get(f.path(), None) == f.filenode():

137

if am.get(f.path(), None) == f.filenode():

138

return f

138

return f

139

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

139

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

140

return None

140

return None

141

142

def _dirstatecopies(d, match=None):

142

def _dirstatecopies(d, match=None):

143

ds = d._repo.dirstate

143

ds = d._repo.dirstate

144

c = ds.copies().copy()

144

c = ds.copies().copy()

145

for k in list(c):

145

for k in list(c):

146

if ds[k] not in 'anm' or (match and not match(k)):

146

if ds[k] not in 'anm' or (match and not match(k)):

147

del c[k]

147

del c[k]

148

return c

148

return c

149

150

def _computeforwardmissing(a, b, match=None):

150

def _computeforwardmissing(a, b, match=None):

151

"""Computes which files are in b but not a.

151

"""Computes which files are in b but not a.

152

This is its own function so extensions can easily wrap this call to see what

152

This is its own function so extensions can easily wrap this call to see what

153

files _forwardcopies is about to process.

153

files _forwardcopies is about to process.

154

"""

154

"""

155

ma = a.manifest()

155

ma = a.manifest()

156

mb = b.manifest()

156

mb = b.manifest()

157

return mb.filesnotin(ma, match=match)

157

return mb.filesnotin(ma, match=match)

158

159

def _forwardcopies(a, b, match=~~None~~):

159

def _committedforwardcopies(a, b, match):

160

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

160

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

161

162

# check for working copy

163

w = None

164

if b.rev() is None:

165

w = b

166

b = w.p1()

167

if a == b:

168

# short-circuit to avoid issues with merge states

169

return _dirstatecopies(w, match)

170

171

# files might have to be traced back to the fctx parent of the last

161

# files might have to be traced back to the fctx parent of the last

172

# one-side-only changeset, but not further back than that

162

# one-side-only changeset, but not further back than that

173

limit = _findlimit(a._repo, a.rev(), b.rev())

163

limit = _findlimit(a._repo, a.rev(), b.rev())

174

if limit is None:

164

if limit is None:

175

limit = -1

165

limit = -1

176

am = a.manifest()

166

am = a.manifest()

177

167

178

# find where new files came from

168

# find where new files came from

179

# we currently don't try to find where old files went, too expensive

169

# we currently don't try to find where old files went, too expensive

180

# this means we can miss a case like 'hg rm b; hg cp a b'

170

# this means we can miss a case like 'hg rm b; hg cp a b'

181

cm = {}

171

cm = {}

182

172

183

# Computing the forward missing is quite expensive on large manifests, since

173

# Computing the forward missing is quite expensive on large manifests, since

184

# it compares the entire manifests. We can optimize it in the common use

174

# it compares the entire manifests. We can optimize it in the common use

185

# case of computing what copies are in a commit versus its parent (like

175

# case of computing what copies are in a commit versus its parent (like

186

# during a rebase or histedit). Note, we exclude merge commits from this

176

# during a rebase or histedit). Note, we exclude merge commits from this

187

# optimization, since the ctx.files() for a merge commit is not correct for

177

# optimization, since the ctx.files() for a merge commit is not correct for

188

# this comparison.

178

# this comparison.

189

forwardmissingmatch = match

179

forwardmissingmatch = match

190

if b.p1() == a and b.p2().node() == node.nullid:

180

if b.p1() == a and b.p2().node() == node.nullid:

191

filesmatcher = scmutil.matchfiles(a._repo, b.files())

181

filesmatcher = scmutil.matchfiles(a._repo, b.files())

192

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

182

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

193

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

183

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

194

184

195

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

185

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

196

for f in missing:

186

for f in missing:

197

fctx = b[f]

187

fctx = b[f]

198

fctx._ancestrycontext = ancestrycontext

188

fctx._ancestrycontext = ancestrycontext

199

ofctx = _tracefile(fctx, am, limit)

189

ofctx = _tracefile(fctx, am, limit)

200

if ofctx:

190

if ofctx:

201

cm[f] = ofctx.path()

191

cm[f] = ofctx.path()

192

return cm

193

194

def _forwardcopies(a, b, match=None):

195

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

196

197

# check for working copy

198

w = None

199

if b.rev() is None:

200

w = b

201

b = w.p1()

202

if a == b:

203

# short-circuit to avoid issues with merge states

204

return _dirstatecopies(w, match)

205

206

cm = _committedforwardcopies(a, b, match)

202

207

203

# combine copies from dirstate if necessary

208

# combine copies from dirstate if necessary

204

if w is not None:

209

if w is not None:

205

cm = _chain(a, w, cm, _dirstatecopies(w, match))

210

cm = _chain(a, w, cm, _dirstatecopies(w, match))

206

211

207

return cm

212

return cm

208

213

209

def _backwardrenames(a, b):

214

def _backwardrenames(a, b):

210

if a._repo.ui.config('experimental', 'copytrace') == 'off':

215

if a._repo.ui.config('experimental', 'copytrace') == 'off':

211

return {}

216

return {}

212

217

213

# Even though we're not taking copies into account, 1:n rename situations

218

# Even though we're not taking copies into account, 1:n rename situations

214

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

219

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

215

# arbitrarily pick one of the renames.

220

# arbitrarily pick one of the renames.

216

f = _forwardcopies(b, a)

221

f = _forwardcopies(b, a)

217

r = {}

222

r = {}

218

for k, v in sorted(f.iteritems()):

223

for k, v in sorted(f.iteritems()):

219

# remove copies

224

# remove copies

220

if v in a:

225

if v in a:

221

continue

226

continue

222

r[v] = k

227

r[v] = k

223

return r

228

return r

224

229

225

def pathcopies(x, y, match=None):

230

def pathcopies(x, y, match=None):

226

"""find {dst@y: src@x} copy mapping for directed compare"""

231

"""find {dst@y: src@x} copy mapping for directed compare"""

227

if x == y or not x or not y:

232

if x == y or not x or not y:

228

return {}

233

return {}

229

a = y.ancestor(x)

234

a = y.ancestor(x)

230

if a == x:

235

if a == x:

231

return _forwardcopies(x, y, match=match)

236

return _forwardcopies(x, y, match=match)

232

if a == y:

237

if a == y:

233

return _backwardrenames(x, y)

238

return _backwardrenames(x, y)

234

return _chain(x, y, _backwardrenames(x, a),

239

return _chain(x, y, _backwardrenames(x, a),

235

_forwardcopies(a, y, match=match))

240

_forwardcopies(a, y, match=match))

236

241

237

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

242

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

238

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

243

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

239

and c2. This is its own function so extensions can easily wrap this call

244

and c2. This is its own function so extensions can easily wrap this call

240

to see what files mergecopies is about to process.

245

to see what files mergecopies is about to process.

241

246

242

Even though c1 and c2 are not used in this function, they are useful in

247

Even though c1 and c2 are not used in this function, they are useful in

243

other extensions for being able to read the file nodes of the changed files.

248

other extensions for being able to read the file nodes of the changed files.

244

249

245

"baselabel" can be passed to help distinguish the multiple computations

250

"baselabel" can be passed to help distinguish the multiple computations

246

done in the graft case.

251

done in the graft case.

247

"""

252

"""

248

u1 = sorted(addedinm1 - addedinm2)

253

u1 = sorted(addedinm1 - addedinm2)

249

u2 = sorted(addedinm2 - addedinm1)

254

u2 = sorted(addedinm2 - addedinm1)

250

255

251

header = " unmatched files in %s"

256

header = " unmatched files in %s"

252

if baselabel:

257

if baselabel:

253

header += ' (from %s)' % baselabel

258

header += ' (from %s)' % baselabel

254

if u1:

259

if u1:

255

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

260

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

256

if u2:

261

if u2:

257

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

262

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

258

return u1, u2

263

return u1, u2

259

264

260

def _makegetfctx(ctx):

265

def _makegetfctx(ctx):

261

"""return a 'getfctx' function suitable for _checkcopies usage

266

"""return a 'getfctx' function suitable for _checkcopies usage

262

267

263

We have to re-setup the function building 'filectx' for each

268

We have to re-setup the function building 'filectx' for each

264

'_checkcopies' to ensure the linkrev adjustment is properly setup for

269

'_checkcopies' to ensure the linkrev adjustment is properly setup for

265

each. Linkrev adjustment is important to avoid bug in rename

270

each. Linkrev adjustment is important to avoid bug in rename

266

detection. Moreover, having a proper '_ancestrycontext' setup ensures

271

detection. Moreover, having a proper '_ancestrycontext' setup ensures

267

the performance impact of this adjustment is kept limited. Without it,

272

the performance impact of this adjustment is kept limited. Without it,

268

each file could do a full dag traversal making the time complexity of

273

each file could do a full dag traversal making the time complexity of

269

the operation explode (see issue4537).

274

the operation explode (see issue4537).

270

275

271

This function exists here mostly to limit the impact on stable. Feel

276

This function exists here mostly to limit the impact on stable. Feel

272

free to refactor on default.

277

free to refactor on default.

273

"""

278

"""

274

rev = ctx.rev()

279

rev = ctx.rev()

275

repo = ctx._repo

280

repo = ctx._repo

276

ac = getattr(ctx, '_ancestrycontext', None)

281

ac = getattr(ctx, '_ancestrycontext', None)

277

if ac is None:

282

if ac is None:

278

revs = [rev]

283

revs = [rev]

279

if rev is None:

284

if rev is None:

280

revs = [p.rev() for p in ctx.parents()]

285

revs = [p.rev() for p in ctx.parents()]

281

ac = repo.changelog.ancestors(revs, inclusive=True)

286

ac = repo.changelog.ancestors(revs, inclusive=True)

282

ctx._ancestrycontext = ac

287

ctx._ancestrycontext = ac

283

def makectx(f, n):

288

def makectx(f, n):

284

if n in node.wdirnodes: # in a working context?

289

if n in node.wdirnodes: # in a working context?

285

if ctx.rev() is None:

290

if ctx.rev() is None:

286

return ctx.filectx(f)

291

return ctx.filectx(f)

287

return repo[None][f]

292

return repo[None][f]

288

fctx = repo.filectx(f, fileid=n)

293

fctx = repo.filectx(f, fileid=n)

289

# setup only needed for filectx not create from a changectx

294

# setup only needed for filectx not create from a changectx

290

fctx._ancestrycontext = ac

295

fctx._ancestrycontext = ac

291

fctx._descendantrev = rev

296

fctx._descendantrev = rev

292

return fctx

297

return fctx

293

return util.lrucachefunc(makectx)

298

return util.lrucachefunc(makectx)

294

299

295

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

300

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

296

"""combine partial copy paths"""

301

"""combine partial copy paths"""

297

remainder = {}

302

remainder = {}

298

for f in copyfrom:

303

for f in copyfrom:

299

if f in copyto:

304

if f in copyto:

300

finalcopy[copyto[f]] = copyfrom[f]

305

finalcopy[copyto[f]] = copyfrom[f]

301

del copyto[f]

306

del copyto[f]

302

for f in incompletediverge:

307

for f in incompletediverge:

303

assert f not in diverge

308

assert f not in diverge

304

ic = incompletediverge[f]

309

ic = incompletediverge[f]

305

if ic[0] in copyto:

310

if ic[0] in copyto:

306

diverge[f] = [copyto[ic[0]], ic[1]]

311

diverge[f] = [copyto[ic[0]], ic[1]]

307

else:

312

else:

308

remainder[f] = ic

313

remainder[f] = ic

309

return remainder

314

return remainder

310

315

311

def mergecopies(repo, c1, c2, base):

316

def mergecopies(repo, c1, c2, base):

312

"""

317

"""

313

The function calling different copytracing algorithms on the basis of config

318

The function calling different copytracing algorithms on the basis of config

314

which find moves and copies between context c1 and c2 that are relevant for

319

which find moves and copies between context c1 and c2 that are relevant for

315

merging. 'base' will be used as the merge base.

320

merging. 'base' will be used as the merge base.

316

321

317

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

322

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

318

files that were moved/ copied in one merge parent and modified in another.

323

files that were moved/ copied in one merge parent and modified in another.

319

For example:

324

For example:

320

325

321

o ---> 4 another commit

326

o ---> 4 another commit

322

|

327

|

323

| o ---> 3 commit that modifies a.txt

328

| o ---> 3 commit that modifies a.txt

324

| /

329

| /

325

o / ---> 2 commit that moves a.txt to b.txt

330

o / ---> 2 commit that moves a.txt to b.txt

326

|/

331

|/

327

o ---> 1 merge base

332

o ---> 1 merge base

328

333

329

If we try to rebase revision 3 on revision 4, since there is no a.txt in

334

If we try to rebase revision 3 on revision 4, since there is no a.txt in

330

revision 4, and if user have copytrace disabled, we prints the following

335

revision 4, and if user have copytrace disabled, we prints the following

331

message:

336

message:

332

337

333

```other changed <file> which local deleted```

338

```other changed <file> which local deleted```

334

339

335

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

340

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

336

"dirmove".

341

"dirmove".

337

342

338

"copy" is a mapping from destination name -> source name,

343

"copy" is a mapping from destination name -> source name,

339

where source is in c1 and destination is in c2 or vice-versa.

344

where source is in c1 and destination is in c2 or vice-versa.

340

345

341

"movewithdir" is a mapping from source name -> destination name,

346

"movewithdir" is a mapping from source name -> destination name,

342

where the file at source present in one context but not the other

347

where the file at source present in one context but not the other

343

needs to be moved to destination by the merge process, because the

348

needs to be moved to destination by the merge process, because the

344

other context moved the directory it is in.

349

other context moved the directory it is in.

345

350

346

"diverge" is a mapping of source name -> list of destination names

351

"diverge" is a mapping of source name -> list of destination names

347

for divergent renames.

352

for divergent renames.

348

353

349

"renamedelete" is a mapping of source name -> list of destination

354

"renamedelete" is a mapping of source name -> list of destination

350

names for files deleted in c1 that were renamed in c2 or vice-versa.

355

names for files deleted in c1 that were renamed in c2 or vice-versa.

351

356

352

"dirmove" is a mapping of detected source dir -> destination dir renames.

357

"dirmove" is a mapping of detected source dir -> destination dir renames.

353

This is needed for handling changes to new files previously grafted into

358

This is needed for handling changes to new files previously grafted into

354

renamed directories.

359

renamed directories.

355

"""

360

"""

356

# avoid silly behavior for update from empty dir

361

# avoid silly behavior for update from empty dir

357

if not c1 or not c2 or c1 == c2:

362

if not c1 or not c2 or c1 == c2:

358

return {}, {}, {}, {}, {}

363

return {}, {}, {}, {}, {}

359

364

360

# avoid silly behavior for parent -> working dir

365

# avoid silly behavior for parent -> working dir

361

if c2.node() is None and c1.node() == repo.dirstate.p1():

366

if c2.node() is None and c1.node() == repo.dirstate.p1():

362

return repo.dirstate.copies(), {}, {}, {}, {}

367

return repo.dirstate.copies(), {}, {}, {}, {}

363

368

364

copytracing = repo.ui.config('experimental', 'copytrace')

369

copytracing = repo.ui.config('experimental', 'copytrace')

365

370

366

# Copy trace disabling is explicitly below the node == p1 logic above

371

# Copy trace disabling is explicitly below the node == p1 logic above

367

# because the logic above is required for a simple copy to be kept across a

372

# because the logic above is required for a simple copy to be kept across a

368

# rebase.

373

# rebase.

369

if copytracing == 'off':

374

if copytracing == 'off':

370

return {}, {}, {}, {}, {}

375

return {}, {}, {}, {}, {}

371

elif copytracing == 'heuristics':

376

elif copytracing == 'heuristics':

372

# Do full copytracing if only non-public revisions are involved as

377

# Do full copytracing if only non-public revisions are involved as

373

# that will be fast enough and will also cover the copies which could

378

# that will be fast enough and will also cover the copies which could

374

# be missed by heuristics

379

# be missed by heuristics

375

if _isfullcopytraceable(repo, c1, base):

380

if _isfullcopytraceable(repo, c1, base):

376

return _fullcopytracing(repo, c1, c2, base)

381

return _fullcopytracing(repo, c1, c2, base)

377

return _heuristicscopytracing(repo, c1, c2, base)

382

return _heuristicscopytracing(repo, c1, c2, base)

378

else:

383

else:

379

return _fullcopytracing(repo, c1, c2, base)

384

return _fullcopytracing(repo, c1, c2, base)

380

385

381

def _isfullcopytraceable(repo, c1, base):

386

def _isfullcopytraceable(repo, c1, base):

382

""" Checks that if base, source and destination are all no-public branches,

387

""" Checks that if base, source and destination are all no-public branches,

383

if yes let's use the full copytrace algorithm for increased capabilities

388

if yes let's use the full copytrace algorithm for increased capabilities

384

since it will be fast enough.

389

since it will be fast enough.

385

390

386

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

391

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

387

number of changesets from c1 to base such that if number of changesets are

392

number of changesets from c1 to base such that if number of changesets are

388

more than the limit, full copytracing algorithm won't be used.

393

more than the limit, full copytracing algorithm won't be used.

389

"""

394

"""

390

if c1.rev() is None:

395

if c1.rev() is None:

391

c1 = c1.p1()

396

c1 = c1.p1()

392

if c1.mutable() and base.mutable():

397

if c1.mutable() and base.mutable():

393

sourcecommitlimit = repo.ui.configint('experimental',

398

sourcecommitlimit = repo.ui.configint('experimental',

394

'copytrace.sourcecommitlimit')

399

'copytrace.sourcecommitlimit')

395

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

400

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

396

return commits < sourcecommitlimit

401

return commits < sourcecommitlimit

397

return False

402

return False

398

403

399

def _fullcopytracing(repo, c1, c2, base):

404

def _fullcopytracing(repo, c1, c2, base):

400

""" The full copytracing algorithm which finds all the new files that were

405

""" The full copytracing algorithm which finds all the new files that were

401

added from merge base up to the top commit and for each file it checks if

406

added from merge base up to the top commit and for each file it checks if

402

this file was copied from another file.

407

this file was copied from another file.

403

408

404

This is pretty slow when a lot of changesets are involved but will track all

409

This is pretty slow when a lot of changesets are involved but will track all

405

the copies.

410

the copies.

406

"""

411

"""

407

# In certain scenarios (e.g. graft, update or rebase), base can be

412

# In certain scenarios (e.g. graft, update or rebase), base can be

408

# overridden We still need to know a real common ancestor in this case We

413

# overridden We still need to know a real common ancestor in this case We

409

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

414

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

410

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

415

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

411

# caller may not know if the revision passed in lieu of the CA is a genuine

416

# caller may not know if the revision passed in lieu of the CA is a genuine

412

# common ancestor or not without explicitly checking it, it's better to

417

# common ancestor or not without explicitly checking it, it's better to

413

# determine that here.

418

# determine that here.

414

#

419

#

415

# base.descendant(wc) and base.descendant(base) are False, work around that

420

# base.descendant(wc) and base.descendant(base) are False, work around that

416

_c1 = c1.p1() if c1.rev() is None else c1

421

_c1 = c1.p1() if c1.rev() is None else c1

417

_c2 = c2.p1() if c2.rev() is None else c2

422

_c2 = c2.p1() if c2.rev() is None else c2

418

# an endpoint is "dirty" if it isn't a descendant of the merge base

423

# an endpoint is "dirty" if it isn't a descendant of the merge base

419

# if we have a dirty endpoint, we need to trigger graft logic, and also

424

# if we have a dirty endpoint, we need to trigger graft logic, and also

420

# keep track of which endpoint is dirty

425

# keep track of which endpoint is dirty

421

dirtyc1 = not (base == _c1 or base.descendant(_c1))

426

dirtyc1 = not (base == _c1 or base.descendant(_c1))

422

dirtyc2 = not (base == _c2 or base.descendant(_c2))

427

dirtyc2 = not (base == _c2 or base.descendant(_c2))

423

graft = dirtyc1 or dirtyc2

428

graft = dirtyc1 or dirtyc2

424

tca = base

429

tca = base

425

if graft:

430

if graft:

426

tca = _c1.ancestor(_c2)

431

tca = _c1.ancestor(_c2)

427

432

428

limit = _findlimit(repo, c1.rev(), c2.rev())

433

limit = _findlimit(repo, c1.rev(), c2.rev())

429

if limit is None:

434

if limit is None:

430

# no common ancestor, no copies

435

# no common ancestor, no copies

431

return {}, {}, {}, {}, {}

436

return {}, {}, {}, {}, {}

432

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

437

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

433

438

434

m1 = c1.manifest()

439

m1 = c1.manifest()

435

m2 = c2.manifest()

440

m2 = c2.manifest()

436

mb = base.manifest()

441

mb = base.manifest()

437

442

438

# gather data from _checkcopies:

443

# gather data from _checkcopies:

439

# - diverge = record all diverges in this dict

444

# - diverge = record all diverges in this dict

440

# - copy = record all non-divergent copies in this dict

445

# - copy = record all non-divergent copies in this dict

441

# - fullcopy = record all copies in this dict

446

# - fullcopy = record all copies in this dict

442

# - incomplete = record non-divergent partial copies here

447

# - incomplete = record non-divergent partial copies here

443

# - incompletediverge = record divergent partial copies here

448

# - incompletediverge = record divergent partial copies here

444

diverge = {} # divergence data is shared

449

diverge = {} # divergence data is shared

445

incompletediverge = {}

450

incompletediverge = {}

446

data1 = {'copy': {},

451

data1 = {'copy': {},

447

'fullcopy': {},

452

'fullcopy': {},

448

'incomplete': {},

453

'incomplete': {},

449

'diverge': diverge,

454

'diverge': diverge,

450

'incompletediverge': incompletediverge,

455

'incompletediverge': incompletediverge,

451

}

456

}

452

data2 = {'copy': {},

457

data2 = {'copy': {},

453

'fullcopy': {},

458

'fullcopy': {},

454

'incomplete': {},

459

'incomplete': {},

455

'diverge': diverge,

460

'diverge': diverge,

456

'incompletediverge': incompletediverge,

461

'incompletediverge': incompletediverge,

457

}

462

}

458

463

459

# find interesting file sets from manifests

464

# find interesting file sets from manifests

460

addedinm1 = m1.filesnotin(mb)

465

addedinm1 = m1.filesnotin(mb)

461

addedinm2 = m2.filesnotin(mb)

466

addedinm2 = m2.filesnotin(mb)

462

bothnew = sorted(addedinm1 & addedinm2)

467

bothnew = sorted(addedinm1 & addedinm2)

463

if tca == base:

468

if tca == base:

464

# unmatched file from base

469

# unmatched file from base

465

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

470

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

466

u1u, u2u = u1r, u2r

471

u1u, u2u = u1r, u2r

467

else:

472

else:

468

# unmatched file from base (DAG rotation in the graft case)

473

# unmatched file from base (DAG rotation in the graft case)

469

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

474

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

470

baselabel='base')

475

baselabel='base')

471

# unmatched file from topological common ancestors (no DAG rotation)

476

# unmatched file from topological common ancestors (no DAG rotation)

472

# need to recompute this for directory move handling when grafting

477

# need to recompute this for directory move handling when grafting

473

mta = tca.manifest()

478

mta = tca.manifest()

474

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

479

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

475

m2.filesnotin(mta),

480

m2.filesnotin(mta),

476

baselabel='topological common ancestor')

481

baselabel='topological common ancestor')

477

482

478

for f in u1u:

483

for f in u1u:

479

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

484

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

480

485

481

for f in u2u:

486

for f in u2u:

482

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

487

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

483

488

484

copy = dict(data1['copy'])

489

copy = dict(data1['copy'])

485

copy.update(data2['copy'])

490

copy.update(data2['copy'])

486

fullcopy = dict(data1['fullcopy'])

491

fullcopy = dict(data1['fullcopy'])

487

fullcopy.update(data2['fullcopy'])

492

fullcopy.update(data2['fullcopy'])

488

493

489

if dirtyc1:

494

if dirtyc1:

490

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

495

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

491

incompletediverge)

496

incompletediverge)

492

else:

497

else:

493

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

498

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

494

incompletediverge)

499

incompletediverge)

495

500

496

renamedelete = {}

501

renamedelete = {}

497

renamedeleteset = set()

502

renamedeleteset = set()

498

divergeset = set()

503

divergeset = set()

499

for of, fl in list(diverge.items()):

504

for of, fl in list(diverge.items()):

500

if len(fl) == 1 or of in c1 or of in c2:

505

if len(fl) == 1 or of in c1 or of in c2:

501

del diverge[of] # not actually divergent, or not a rename

506

del diverge[of] # not actually divergent, or not a rename

502

if of not in c1 and of not in c2:

507

if of not in c1 and of not in c2:

503

# renamed on one side, deleted on the other side, but filter

508

# renamed on one side, deleted on the other side, but filter

504

# out files that have been renamed and then deleted

509

# out files that have been renamed and then deleted

505

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

510

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

506

renamedeleteset.update(fl) # reverse map for below

511

renamedeleteset.update(fl) # reverse map for below

507

else:

512

else:

508

divergeset.update(fl) # reverse map for below

513

divergeset.update(fl) # reverse map for below

509

514

510

if bothnew:

515

if bothnew:

511

repo.ui.debug(" unmatched files new in both:\n %s\n"

516

repo.ui.debug(" unmatched files new in both:\n %s\n"

512

% "\n ".join(bothnew))

517

% "\n ".join(bothnew))

513

bothdiverge = {}

518

bothdiverge = {}

514

bothincompletediverge = {}

519

bothincompletediverge = {}

515

remainder = {}

520

remainder = {}

516

both1 = {'copy': {},

521

both1 = {'copy': {},

517

'fullcopy': {},

522

'fullcopy': {},

518

'incomplete': {},

523

'incomplete': {},

519

'diverge': bothdiverge,

524

'diverge': bothdiverge,

520

'incompletediverge': bothincompletediverge

525

'incompletediverge': bothincompletediverge

521

}

526

}

522

both2 = {'copy': {},

527

both2 = {'copy': {},

523

'fullcopy': {},

528

'fullcopy': {},

524

'incomplete': {},

529

'incomplete': {},

525

'diverge': bothdiverge,

530

'diverge': bothdiverge,

526

'incompletediverge': bothincompletediverge

531

'incompletediverge': bothincompletediverge

527

}

532

}

528

for f in bothnew:

533

for f in bothnew:

529

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

534

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

530

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

535

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

531

if dirtyc1:

536

if dirtyc1:

532

# incomplete copies may only be found on the "dirty" side for bothnew

537

# incomplete copies may only be found on the "dirty" side for bothnew

533

assert not both2['incomplete']

538

assert not both2['incomplete']

534

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

539

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

535

bothincompletediverge)

540

bothincompletediverge)

536

elif dirtyc2:

541

elif dirtyc2:

537

assert not both1['incomplete']

542

assert not both1['incomplete']

538

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

543

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

539

bothincompletediverge)

544

bothincompletediverge)

540

else:

545

else:

541

# incomplete copies and divergences can't happen outside grafts

546

# incomplete copies and divergences can't happen outside grafts

542

assert not both1['incomplete']

547

assert not both1['incomplete']

543

assert not both2['incomplete']

548

assert not both2['incomplete']

544

assert not bothincompletediverge

549

assert not bothincompletediverge

545

for f in remainder:

550

for f in remainder:

546

assert f not in bothdiverge

551

assert f not in bothdiverge

547

ic = remainder[f]

552

ic = remainder[f]

548

if ic[0] in (m1 if dirtyc1 else m2):

553

if ic[0] in (m1 if dirtyc1 else m2):

549

# backed-out rename on one side, but watch out for deleted files

554

# backed-out rename on one side, but watch out for deleted files

550

bothdiverge[f] = ic

555

bothdiverge[f] = ic

551

for of, fl in bothdiverge.items():

556

for of, fl in bothdiverge.items():

552

if len(fl) == 2 and fl[0] == fl[1]:

557

if len(fl) == 2 and fl[0] == fl[1]:

553

copy[fl[0]] = of # not actually divergent, just matching renames

558

copy[fl[0]] = of # not actually divergent, just matching renames

554

559

555

if fullcopy and repo.ui.debugflag:

560

if fullcopy and repo.ui.debugflag:

556

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

561

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

557

"% = renamed and deleted):\n")

562

"% = renamed and deleted):\n")

558

for f in sorted(fullcopy):

563

for f in sorted(fullcopy):

559

note = ""

564

note = ""

560

if f in copy:

565

if f in copy:

561

note += "*"

566

note += "*"

562

if f in divergeset:

567

if f in divergeset:

563

note += "!"

568

note += "!"

564

if f in renamedeleteset:

569

if f in renamedeleteset:

565

note += "%"

570

note += "%"

566

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

571

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

567

note))

572

note))

568

del divergeset

573

del divergeset

569

574

570

if not fullcopy:

575

if not fullcopy:

571

return copy, {}, diverge, renamedelete, {}

576

return copy, {}, diverge, renamedelete, {}

572

577

573

repo.ui.debug(" checking for directory renames\n")

578

repo.ui.debug(" checking for directory renames\n")

574

579

575

# generate a directory move map

580

# generate a directory move map

576

d1, d2 = c1.dirs(), c2.dirs()

581

d1, d2 = c1.dirs(), c2.dirs()

577

# Hack for adding '', which is not otherwise added, to d1 and d2

582

# Hack for adding '', which is not otherwise added, to d1 and d2

578

d1.addpath('/')

583

d1.addpath('/')

579

d2.addpath('/')

584

d2.addpath('/')

580

invalid = set()

585

invalid = set()

581

dirmove = {}

586

dirmove = {}

582

587

583

# examine each file copy for a potential directory move, which is

588

# examine each file copy for a potential directory move, which is

584

# when all the files in a directory are moved to a new directory

589

# when all the files in a directory are moved to a new directory

585

for dst, src in fullcopy.iteritems():

590

for dst, src in fullcopy.iteritems():

586

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

591

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

587

if dsrc in invalid:

592

if dsrc in invalid:

588

# already seen to be uninteresting

593

# already seen to be uninteresting

589

continue

594

continue

590

elif dsrc in d1 and ddst in d1:

595

elif dsrc in d1 and ddst in d1:

591

# directory wasn't entirely moved locally

596

# directory wasn't entirely moved locally

592

invalid.add(dsrc + "/")

597

invalid.add(dsrc + "/")

593

elif dsrc in d2 and ddst in d2:

598

elif dsrc in d2 and ddst in d2:

594

# directory wasn't entirely moved remotely

599

# directory wasn't entirely moved remotely

595

invalid.add(dsrc + "/")

600

invalid.add(dsrc + "/")

596

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

601

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

597

# files from the same directory moved to two different places

602

# files from the same directory moved to two different places

598

invalid.add(dsrc + "/")

603

invalid.add(dsrc + "/")

599

else:

604

else:

600

# looks good so far

605

# looks good so far

601

dirmove[dsrc + "/"] = ddst + "/"

606

dirmove[dsrc + "/"] = ddst + "/"

602

607

603

for i in invalid:

608

for i in invalid:

604

if i in dirmove:

609

if i in dirmove:

605

del dirmove[i]

610

del dirmove[i]

606

del d1, d2, invalid

611

del d1, d2, invalid

607

612

608

if not dirmove:

613

if not dirmove:

609

return copy, {}, diverge, renamedelete, {}

614

return copy, {}, diverge, renamedelete, {}

610

615

611

for d in dirmove:

616

for d in dirmove:

612

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

617

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

613

(d, dirmove[d]))

618

(d, dirmove[d]))

614

619

615

movewithdir = {}

620

movewithdir = {}

616

# check unaccounted nonoverlapping files against directory moves

621

# check unaccounted nonoverlapping files against directory moves

617

for f in u1r + u2r:

622

for f in u1r + u2r:

618

if f not in fullcopy:

623

if f not in fullcopy:

619

for d in dirmove:

624

for d in dirmove:

620

if f.startswith(d):

625

if f.startswith(d):

621

# new file added in a directory that was moved, move it

626

# new file added in a directory that was moved, move it

622

df = dirmove[d] + f[len(d):]

627

df = dirmove[d] + f[len(d):]

623

if df not in copy:

628

if df not in copy:

624

movewithdir[f] = df

629

movewithdir[f] = df

625

repo.ui.debug((" pending file src: '%s' -> "

630

repo.ui.debug((" pending file src: '%s' -> "

626

"dst: '%s'\n") % (f, df))

631

"dst: '%s'\n") % (f, df))

627

break

632

break

628

633

629

return copy, movewithdir, diverge, renamedelete, dirmove

634

return copy, movewithdir, diverge, renamedelete, dirmove

630

635

631

def _heuristicscopytracing(repo, c1, c2, base):

636

def _heuristicscopytracing(repo, c1, c2, base):

632

""" Fast copytracing using filename heuristics

637

""" Fast copytracing using filename heuristics

633

638

634

Assumes that moves or renames are of following two types:

639

Assumes that moves or renames are of following two types:

635

640

636

1) Inside a directory only (same directory name but different filenames)

641

1) Inside a directory only (same directory name but different filenames)

637

2) Move from one directory to another

642

2) Move from one directory to another

638

(same filenames but different directory names)

643

(same filenames but different directory names)

639

644

640

Works only when there are no merge commits in the "source branch".

645

Works only when there are no merge commits in the "source branch".

641

Source branch is commits from base up to c2 not including base.

646

Source branch is commits from base up to c2 not including base.

642

647

643

If merge is involved it fallbacks to _fullcopytracing().

648

If merge is involved it fallbacks to _fullcopytracing().

644

649

645

Can be used by setting the following config:

650

Can be used by setting the following config:

646

651

647

[experimental]

652

[experimental]

648

copytrace = heuristics

653

copytrace = heuristics

649

654

650

In some cases the copy/move candidates found by heuristics can be very large

655

In some cases the copy/move candidates found by heuristics can be very large

651

in number and that will make the algorithm slow. The number of possible

656

in number and that will make the algorithm slow. The number of possible

652

candidates to check can be limited by using the config

657

candidates to check can be limited by using the config

653

`experimental.copytrace.movecandidateslimit` which defaults to 100.

658

`experimental.copytrace.movecandidateslimit` which defaults to 100.

654

"""

659

"""

655

660

656

if c1.rev() is None:

661

if c1.rev() is None:

657

c1 = c1.p1()

662

c1 = c1.p1()

658

if c2.rev() is None:

663

if c2.rev() is None:

659

c2 = c2.p1()

664

c2 = c2.p1()

660

665

661

copies = {}

666

copies = {}

662

667

663

changedfiles = set()

668

changedfiles = set()

664

m1 = c1.manifest()

669

m1 = c1.manifest()

665

if not repo.revs('%d::%d', base.rev(), c2.rev()):

670

if not repo.revs('%d::%d', base.rev(), c2.rev()):

666

# If base is not in c2 branch, we switch to fullcopytracing

671

# If base is not in c2 branch, we switch to fullcopytracing

667

repo.ui.debug("switching to full copytracing as base is not "

672

repo.ui.debug("switching to full copytracing as base is not "

668

"an ancestor of c2\n")

673

"an ancestor of c2\n")

669

return _fullcopytracing(repo, c1, c2, base)

674

return _fullcopytracing(repo, c1, c2, base)

670

675

671

ctx = c2

676

ctx = c2

672

while ctx != base:

677

while ctx != base:

673

if len(ctx.parents()) == 2:

678

if len(ctx.parents()) == 2:

674

# To keep things simple let's not handle merges

679

# To keep things simple let's not handle merges

675

repo.ui.debug("switching to full copytracing because of merges\n")

680

repo.ui.debug("switching to full copytracing because of merges\n")

676

return _fullcopytracing(repo, c1, c2, base)

681

return _fullcopytracing(repo, c1, c2, base)

677

changedfiles.update(ctx.files())

682

changedfiles.update(ctx.files())

678

ctx = ctx.p1()

683

ctx = ctx.p1()

679

684

680

cp = _forwardcopies(base, c2)

685

cp = _forwardcopies(base, c2)

681

for dst, src in cp.iteritems():

686

for dst, src in cp.iteritems():

682

if src in m1:

687

if src in m1:

683

copies[dst] = src

688

copies[dst] = src

684

689

685

# file is missing if it isn't present in the destination, but is present in

690

# file is missing if it isn't present in the destination, but is present in

686

# the base and present in the source.

691

# the base and present in the source.

687

# Presence in the base is important to exclude added files, presence in the

692

# Presence in the base is important to exclude added files, presence in the

688

# source is important to exclude removed files.

693

# source is important to exclude removed files.

689

missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,

694

missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,

690

changedfiles)

695

changedfiles)

691

696

692

if missingfiles:

697

if missingfiles:

693

basenametofilename = collections.defaultdict(list)

698

basenametofilename = collections.defaultdict(list)

694

dirnametofilename = collections.defaultdict(list)

699

dirnametofilename = collections.defaultdict(list)

695

700

696

for f in m1.filesnotin(base.manifest()):

701

for f in m1.filesnotin(base.manifest()):

697

basename = os.path.basename(f)

702

basename = os.path.basename(f)

698

dirname = os.path.dirname(f)

703

dirname = os.path.dirname(f)

699

basenametofilename[basename].append(f)

704

basenametofilename[basename].append(f)

700

dirnametofilename[dirname].append(f)

705

dirnametofilename[dirname].append(f)

701

706

702

# in case of a rebase/graft, base may not be a common ancestor

707

# in case of a rebase/graft, base may not be a common ancestor

703

anc = c1.ancestor(c2)

708

anc = c1.ancestor(c2)

704

709

705

for f in missingfiles:

710

for f in missingfiles:

706

basename = os.path.basename(f)

711

basename = os.path.basename(f)

707

dirname = os.path.dirname(f)

712

dirname = os.path.dirname(f)

708

samebasename = basenametofilename[basename]

713

samebasename = basenametofilename[basename]

709

samedirname = dirnametofilename[dirname]

714

samedirname = dirnametofilename[dirname]

710

movecandidates = samebasename + samedirname

715

movecandidates = samebasename + samedirname

711

# f is guaranteed to be present in c2, that's why

716

# f is guaranteed to be present in c2, that's why

712

# c2.filectx(f) won't fail

717

# c2.filectx(f) won't fail

713

f2 = c2.filectx(f)

718

f2 = c2.filectx(f)

714

# we can have a lot of candidates which can slow down the heuristics

719

# we can have a lot of candidates which can slow down the heuristics

715

# config value to limit the number of candidates moves to check

720

# config value to limit the number of candidates moves to check

716

maxcandidates = repo.ui.configint('experimental',

721

maxcandidates = repo.ui.configint('experimental',

717

'copytrace.movecandidateslimit')

722

'copytrace.movecandidateslimit')

718

723

719

if len(movecandidates) > maxcandidates:

724

if len(movecandidates) > maxcandidates:

720

repo.ui.status(_("skipping copytracing for '%s', more "

725

repo.ui.status(_("skipping copytracing for '%s', more "

721

"candidates than the limit: %d\n")

726

"candidates than the limit: %d\n")

722

% (f, len(movecandidates)))

727

% (f, len(movecandidates)))

723

continue

728

continue

724

729

725

for candidate in movecandidates:

730

for candidate in movecandidates:

726

f1 = c1.filectx(candidate)

731

f1 = c1.filectx(candidate)

727

if _related(f1, f2, anc.rev()):

732

if _related(f1, f2, anc.rev()):

728

# if there are a few related copies then we'll merge

733

# if there are a few related copies then we'll merge

729

# changes into all of them. This matches the behaviour

734

# changes into all of them. This matches the behaviour

730

# of upstream copytracing

735

# of upstream copytracing

731

copies[candidate] = f

736

copies[candidate] = f

732

737

733

return copies, {}, {}, {}, {}

738

return copies, {}, {}, {}, {}

734

739

735

def _related(f1, f2, limit):

740

def _related(f1, f2, limit):

736

"""return True if f1 and f2 filectx have a common ancestor

741

"""return True if f1 and f2 filectx have a common ancestor

737

742

738

Walk back to common ancestor to see if the two files originate

743

Walk back to common ancestor to see if the two files originate

739

from the same file. Since workingfilectx's rev() is None it messes

744

from the same file. Since workingfilectx's rev() is None it messes

740

up the integer comparison logic, hence the pre-step check for

745

up the integer comparison logic, hence the pre-step check for

741

None (f1 and f2 can only be workingfilectx's initially).

746

None (f1 and f2 can only be workingfilectx's initially).

742

"""

747

"""

743

748

744

if f1 == f2:

749

if f1 == f2:

745

return f1 # a match

750

return f1 # a match

746

751

747

g1, g2 = f1.ancestors(), f2.ancestors()

752

g1, g2 = f1.ancestors(), f2.ancestors()

748

try:

753

try:

749

f1r, f2r = f1.linkrev(), f2.linkrev()

754

f1r, f2r = f1.linkrev(), f2.linkrev()

750

755

751

if f1r is None:

756

if f1r is None:

752

f1 = next(g1)

757

f1 = next(g1)

753

if f2r is None:

758

if f2r is None:

754

f2 = next(g2)

759

f2 = next(g2)

755

760

756

while True:

761

while True:

757

f1r, f2r = f1.linkrev(), f2.linkrev()

762

f1r, f2r = f1.linkrev(), f2.linkrev()

758

if f1r > f2r:

763

if f1r > f2r:

759

f1 = next(g1)

764

f1 = next(g1)

760

elif f2r > f1r:

765

elif f2r > f1r:

761

f2 = next(g2)

766

f2 = next(g2)

762

elif f1 == f2:

767

elif f1 == f2:

763

return f1 # a match

768

return f1 # a match

764

elif f1r == f2r or f1r < limit or f2r < limit:

769

elif f1r == f2r or f1r < limit or f2r < limit:

765

return False # copy no longer relevant

770

return False # copy no longer relevant

766

except StopIteration:

771

except StopIteration:

767

return False

772

return False

768

773

769

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

774

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

770

"""

775

"""

771

check possible copies of f from msrc to mdst

776

check possible copies of f from msrc to mdst

772

777

773

srcctx = starting context for f in msrc

778

srcctx = starting context for f in msrc

774

dstctx = destination context for f in mdst

779

dstctx = destination context for f in mdst

775

f = the filename to check (as in msrc)

780

f = the filename to check (as in msrc)

776

base = the changectx used as a merge base

781

base = the changectx used as a merge base

777

tca = topological common ancestor for graft-like scenarios

782

tca = topological common ancestor for graft-like scenarios

778

remotebase = True if base is outside tca::srcctx, False otherwise

783

remotebase = True if base is outside tca::srcctx, False otherwise

779

limit = the rev number to not search beyond

784

limit = the rev number to not search beyond

780

data = dictionary of dictionary to store copy data. (see mergecopies)

785

data = dictionary of dictionary to store copy data. (see mergecopies)

781

786

782

note: limit is only an optimization, and provides no guarantee that

787

note: limit is only an optimization, and provides no guarantee that

783

irrelevant revisions will not be visited

788

irrelevant revisions will not be visited

784

there is no easy way to make this algorithm stop in a guaranteed way

789

there is no easy way to make this algorithm stop in a guaranteed way

785

once it "goes behind a certain revision".

790

once it "goes behind a certain revision".

786

"""

791

"""

787

792

788

msrc = srcctx.manifest()

793

msrc = srcctx.manifest()

789

mdst = dstctx.manifest()

794

mdst = dstctx.manifest()

790

mb = base.manifest()

795

mb = base.manifest()

791

mta = tca.manifest()

796

mta = tca.manifest()

792

# Might be true if this call is about finding backward renames,

797

# Might be true if this call is about finding backward renames,

793

# This happens in the case of grafts because the DAG is then rotated.

798

# This happens in the case of grafts because the DAG is then rotated.

794

# If the file exists in both the base and the source, we are not looking

799

# If the file exists in both the base and the source, we are not looking

795

# for a rename on the source side, but on the part of the DAG that is

800

# for a rename on the source side, but on the part of the DAG that is

796

# traversed backwards.

801

# traversed backwards.

797

#

802

#

798

# In the case there is both backward and forward renames (before and after

803

# In the case there is both backward and forward renames (before and after

799

# the base) this is more complicated as we must detect a divergence.

804

# the base) this is more complicated as we must detect a divergence.

800

# We use 'backwards = False' in that case.

805

# We use 'backwards = False' in that case.

801

backwards = not remotebase and base != tca and f in mb

806

backwards = not remotebase and base != tca and f in mb

802

getsrcfctx = _makegetfctx(srcctx)

807

getsrcfctx = _makegetfctx(srcctx)

803

getdstfctx = _makegetfctx(dstctx)

808

getdstfctx = _makegetfctx(dstctx)

804

809

805

if msrc[f] == mb.get(f) and not remotebase:

810

if msrc[f] == mb.get(f) and not remotebase:

806

# Nothing to merge

811

# Nothing to merge

807

return

812

return

808

813

809

of = None

814

of = None

810

seen = {f}

815

seen = {f}

811

for oc in getsrcfctx(f, msrc[f]).ancestors():

816

for oc in getsrcfctx(f, msrc[f]).ancestors():

812

ocr = oc.linkrev()

817

ocr = oc.linkrev()

813

of = oc.path()

818

of = oc.path()

814

if of in seen:

819

if of in seen:

815

# check limit late - grab last rename before

820

# check limit late - grab last rename before

816

if ocr < limit:

821

if ocr < limit:

817

break

822

break

818

continue

823

continue

819

seen.add(of)

824

seen.add(of)

820

825

821

# remember for dir rename detection

826

# remember for dir rename detection

822

if backwards:

827

if backwards:

823

data['fullcopy'][of] = f # grafting backwards through renames

828

data['fullcopy'][of] = f # grafting backwards through renames

824

else:

829

else:

825

data['fullcopy'][f] = of

830

data['fullcopy'][f] = of

826

if of not in mdst:

831

if of not in mdst:

827

continue # no match, keep looking

832

continue # no match, keep looking

828

if mdst[of] == mb.get(of):

833

if mdst[of] == mb.get(of):

829

return # no merge needed, quit early

834

return # no merge needed, quit early

830

c2 = getdstfctx(of, mdst[of])

835

c2 = getdstfctx(of, mdst[of])

831

# c2 might be a plain new file on added on destination side that is

836

# c2 might be a plain new file on added on destination side that is

832

# unrelated to the droids we are looking for.

837

# unrelated to the droids we are looking for.

833

cr = _related(oc, c2, tca.rev())

838

cr = _related(oc, c2, tca.rev())

834

if cr and (of == f or of == c2.path()): # non-divergent

839

if cr and (of == f or of == c2.path()): # non-divergent

835

if backwards:

840

if backwards:

836

data['copy'][of] = f

841

data['copy'][of] = f

837

elif of in mb:

842

elif of in mb:

838

data['copy'][f] = of

843

data['copy'][f] = of

839

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

844

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

840

data['copy'][of] = f

845

data['copy'][of] = f

841

del data['fullcopy'][f]

846

del data['fullcopy'][f]

842

data['fullcopy'][of] = f

847

data['fullcopy'][of] = f

843

else: # divergence w.r.t. graft CA on one side of topological CA

848

else: # divergence w.r.t. graft CA on one side of topological CA

844

for sf in seen:

849

for sf in seen:

845

if sf in mb:

850

if sf in mb:

846

assert sf not in data['diverge']

851

assert sf not in data['diverge']

847

data['diverge'][sf] = [f, of]

852

data['diverge'][sf] = [f, of]

848

break

853

break

849

return

854

return

850

855

851

if of in mta:

856

if of in mta:

852

if backwards or remotebase:

857

if backwards or remotebase:

853

data['incomplete'][of] = f

858

data['incomplete'][of] = f

854

else:

859

else:

855

for sf in seen:

860

for sf in seen:

856

if sf in mb:

861

if sf in mb:

857

if tca == base:

862

if tca == base:

858

data['diverge'].setdefault(sf, []).append(f)

863

data['diverge'].setdefault(sf, []).append(f)

859

else:

864

else:

860

data['incompletediverge'][sf] = [of, f]

865

data['incompletediverge'][sf] = [of, f]

861

return

866

return

862

867

863

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

868

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

864

"""reproduce copies from fromrev to rev in the dirstate

869

"""reproduce copies from fromrev to rev in the dirstate

865

870

866

If skiprev is specified, it's a revision that should be used to

871

If skiprev is specified, it's a revision that should be used to

867

filter copy records. Any copies that occur between fromrev and

872

filter copy records. Any copies that occur between fromrev and

868

skiprev will not be duplicated, even if they appear in the set of

873

skiprev will not be duplicated, even if they appear in the set of

869

copies between fromrev and rev.

874

copies between fromrev and rev.

870

"""

875

"""

871

exclude = {}

876

exclude = {}

872

if (skiprev is not None and

877

if (skiprev is not None and

873

repo.ui.config('experimental', 'copytrace') != 'off'):

878

repo.ui.config('experimental', 'copytrace') != 'off'):

874

# copytrace='off' skips this line, but not the entire function because

879

# copytrace='off' skips this line, but not the entire function because

875

# the line below is O(size of the repo) during a rebase, while the rest

880

# the line below is O(size of the repo) during a rebase, while the rest

876

# of the function is much faster (and is required for carrying copy

881

# of the function is much faster (and is required for carrying copy

877

# metadata across the rebase anyway).

882

# metadata across the rebase anyway).

878

exclude = pathcopies(repo[fromrev], repo[skiprev])

883

exclude = pathcopies(repo[fromrev], repo[skiprev])

879

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

884

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

880

# copies.pathcopies returns backward renames, so dst might not

885

# copies.pathcopies returns backward renames, so dst might not

881

# actually be in the dirstate

886

# actually be in the dirstate

882

if dst in exclude:

887

if dst in exclude:

883

continue

888

continue

884

wctx[dst].markcopied(src)

889

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d, match=None):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
-            def _forwardcopies(a, b, match=None):
+            def _committedforwardcopies(a, b, match):
-                """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
+                """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
-                # check for working copy
-                w = None
-                if b.rev() is None:
-                    w = b
-                    b = w.p1()
-                    if a == b:
-                        # short-circuit to avoid issues with merge states
-                        return _dirstatecopies(w, match)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
+                return cm
+            def _forwardcopies(a, b, match=None):
+                """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
+                # check for working copy
+                w = None
+                if b.rev() is None:
+                    w = b
+                    b = w.p1()
+                    if a == b:
+                        # short-circuit to avoid issues with merge states
+                        return _dirstatecopies(w, match)
+                cm = _committedforwardcopies(a, b, match)
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w, match))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirnodes:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'off':
                     return {}, {}, {}, {}, {}
                 elif copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.descendant(wc) and base.descendant(base) are False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not (base == _c1 or base.descendant(_c1))
                 dirtyc2 = not (base == _c2 or base.descendant(_c2))
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb)
                 addedinm2 = m2.filesnotin(mb)
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
                                                                 m2.filesnotin(mta),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
                                       changedfiles)
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     # in case of a rebase/graft, base may not be a common ancestor
                     anc = c1.ancestor(c2)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2, anc.rev()):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2, limit):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         elif f1 == f2:
                             return f1 # a match
                         elif f1r == f2r or f1r < limit or f2r < limit:
                             return False # copy no longer relevant
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, tca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 if (skiprev is not None and
                     repo.ui.config('experimental', 'copytrace') != 'off'):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)