upstream/mercurial-mirror Commit - r39299:eebd5918

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

24

def _findlimit(repo, a, b):

24

def _findlimit(repo, a, b):

25

"""

25

"""

26

Find the last revision that needs to be checked to ensure that a full

26

Find the last revision that needs to be checked to ensure that a full

27

transitive closure for file copies can be properly calculated.

27

transitive closure for file copies can be properly calculated.

28

Generally, this means finding the earliest revision number that's an

28

Generally, this means finding the earliest revision number that's an

29

ancestor of a or b but not both, except when a or b is a direct descendent

29

ancestor of a or b but not both, except when a or b is a direct descendent

30

of the other, in which case we can return the minimum revnum of a and b.

30

of the other, in which case we can return the minimum revnum of a and b.

31

None if no such revision exists.

31

None if no such revision exists.

32

"""

32

"""

33

34

# basic idea:

34

# basic idea:

35

# - mark a and b with different sides

35

# - mark a and b with different sides

36

# - if a parent's children are all on the same side, the parent is

36

# - if a parent's children are all on the same side, the parent is

37

# on that side, otherwise it is on no side

37

# on that side, otherwise it is on no side

38

# - walk the graph in topological order with the help of a heap;

38

# - walk the graph in topological order with the help of a heap;

39

# - add unseen parents to side map

39

# - add unseen parents to side map

40

# - clear side of any parent that has children on different sides

40

# - clear side of any parent that has children on different sides

41

# - track number of interesting revs that might still be on a side

41

# - track number of interesting revs that might still be on a side

42

# - track the lowest interesting rev seen

42

# - track the lowest interesting rev seen

43

# - quit when interesting revs is zero

43

# - quit when interesting revs is zero

44

45

cl = repo.changelog

45

cl = repo.changelog

46

working = len(cl) # pseudo rev for the working directory

46

working = len(cl) # pseudo rev for the working directory

47

if a is None:

47

if a is None:

48

a = working

48

a = working

49

if b is None:

49

if b is None:

50

b = working

50

b = working

51

52

side = {a: -1, b: 1}

52

side = {a: -1, b: 1}

53

visit = [-a, -b]

53

visit = [-a, -b]

54

heapq.heapify(visit)

54

heapq.heapify(visit)

55

interesting = len(visit)

55

interesting = len(visit)

56

hascommonancestor = False

56

hascommonancestor = False

57

limit = working

57

limit = working

58

59

while interesting:

59

while interesting:

60

r = -heapq.heappop(visit)

60

r = -heapq.heappop(visit)

61

if r == working:

61

if r == working:

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

63

else:

63

else:

64

parents = cl.parentrevs(r)

64

parents = cl.parentrevs(r)

65

for p in parents:

65

for p in parents:

66

if p < 0:

66

if p < 0:

67

continue

67

continue

68

if p not in side:

68

if p not in side:

69

# first time we see p; add it to visit

69

# first time we see p; add it to visit

70

side[p] = side[r]

70

side[p] = side[r]

71

if side[p]:

71

if side[p]:

72

interesting += 1

72

interesting += 1

73

heapq.heappush(visit, -p)

73

heapq.heappush(visit, -p)

74

elif side[p] and side[p] != side[r]:

74

elif side[p] and side[p] != side[r]:

75

# p was interesting but now we know better

75

# p was interesting but now we know better

76

side[p] = 0

76

side[p] = 0

77

interesting -= 1

77

interesting -= 1

78

hascommonancestor = True

78

hascommonancestor = True

79

if side[r]:

79

if side[r]:

80

limit = r # lowest rev visited

80

limit = r # lowest rev visited

81

interesting -= 1

81

interesting -= 1

82

83

if not hascommonancestor:

83

if not hascommonancestor:

84

return None

84

return None

85

86

# Consider the following flow (see test-commit-amend.t under issue4405):

86

# Consider the following flow (see test-commit-amend.t under issue4405):

87

# 1/ File 'a0' committed

87

# 1/ File 'a0' committed

88

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

88

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

89

# 3/ Move back to first commit

89

# 3/ Move back to first commit

90

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

90

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

91

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

91

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

92

#

92

#

93

# During the amend in step five, we will be in this state:

93

# During the amend in step five, we will be in this state:

94

#

94

#

95

# @ 3 temporary amend commit for a1-amend

95

# @ 3 temporary amend commit for a1-amend

96

# |

96

# |

97

# o 2 a1-amend

97

# o 2 a1-amend

98

# |

98

# |

99

# | o 1 a1

99

# | o 1 a1

100

# |/

100

# |/

101

# o 0 a0

101

# o 0 a0

102

#

102

#

103

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

103

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

104

# yet the filelog has the copy information in rev 1 and we will not look

104

# yet the filelog has the copy information in rev 1 and we will not look

105

# back far enough unless we also look at the a and b as candidates.

105

# back far enough unless we also look at the a and b as candidates.

106

# This only occurs when a is a descendent of b or visa-versa.

106

# This only occurs when a is a descendent of b or visa-versa.

107

return min(limit, a, b)

107

return min(limit, a, b)

108

109

def _chain(src, dst, a, b):

109

def _chain(src, dst, a, b):

110

"""chain two sets of copies a->b"""

110

"""chain two sets of copies a->b"""

111

t = a.copy()

111

t = a.copy()

112

for k, v in b.iteritems():

112

for k, v in b.iteritems():

113

if v in t:

113

if v in t:

114

# found a chain

114

# found a chain

115

if t[v] != k:

115

if t[v] != k:

116

# file wasn't renamed back to itself

116

# file wasn't renamed back to itself

117

t[k] = t[v]

117

t[k] = t[v]

118

if v not in dst:

118

if v not in dst:

119

# chain was a rename, not a copy

119

# chain was a rename, not a copy

120

del t[v]

120

del t[v]

121

if v in src:

121

if v in src:

122

# file is a copy of an existing file

122

# file is a copy of an existing file

123

t[k] = v

123

t[k] = v

124

125

# remove criss-crossed copies

125

# remove criss-crossed copies

126

for k, v in list(t.items()):

126

for k, v in list(t.items()):

127

if k in src and v in dst:

127

if k in src and v in dst:

128

del t[k]

128

del t[k]

129

130

return t

130

return t

131

132

def _tracefile(fctx, am, limit=-1):

132

def _tracefile(fctx, am, limit=-1):

133

"""return file context that is the ancestor of fctx present in ancestor

133

"""return file context that is the ancestor of fctx present in ancestor

134

manifest am, stopping after the first ancestor lower than limit"""

134

manifest am, stopping after the first ancestor lower than limit"""

135

136

for f in fctx.ancestors():

136

for f in fctx.ancestors():

137

if am.get(f.path(), None) == f.filenode():

137

if am.get(f.path(), None) == f.filenode():

138

return f

138

return f

139

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

139

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

140

return None

140

return None

141

142

def _dirstatecopies(d, match=None):

142

def _dirstatecopies(d, match=None):

143

ds = d._repo.dirstate

143

ds = d._repo.dirstate

144

c = ds.copies().copy()

144

c = ds.copies().copy()

145

for k in list(c):

145

for k in list(c):

146

if ds[k] not in 'anm' or (match and not match(k)):

146

if ds[k] not in 'anm' or (match and not match(k)):

147

del c[k]

147

del c[k]

148

return c

148

return c

149

150

def _computeforwardmissing(a, b, match=None):

150

def _computeforwardmissing(a, b, match=None):

151

"""Computes which files are in b but not a.

151

"""Computes which files are in b but not a.

152

This is its own function so extensions can easily wrap this call to see what

152

This is its own function so extensions can easily wrap this call to see what

153

files _forwardcopies is about to process.

153

files _forwardcopies is about to process.

154

"""

154

"""

155

ma = a.manifest()

155

ma = a.manifest()

156

mb = b.manifest()

156

mb = b.manifest()

157

return mb.filesnotin(ma, match=match)

157

return mb.filesnotin(ma, match=match)

158

159

def _committedforwardcopies(a, b, match):

159

def _committedforwardcopies(a, b, match):

160

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

160

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

161

# files might have to be traced back to the fctx parent of the last

161

# files might have to be traced back to the fctx parent of the last

162

# one-side-only changeset, but not further back than that

162

# one-side-only changeset, but not further back than that

163

limit = _findlimit(a._repo, a.rev(), b.rev())

163

limit = _findlimit(a._repo, a.rev(), b.rev())

164

if limit is None:

164

if limit is None:

165

limit = -1

165

limit = -1

166

am = a.manifest()

166

am = a.manifest()

167

168

# find where new files came from

168

# find where new files came from

169

# we currently don't try to find where old files went, too expensive

169

# we currently don't try to find where old files went, too expensive

170

# this means we can miss a case like 'hg rm b; hg cp a b'

170

# this means we can miss a case like 'hg rm b; hg cp a b'

171

cm = {}

171

cm = {}

172

173

# Computing the forward missing is quite expensive on large manifests, since

173

# Computing the forward missing is quite expensive on large manifests, since

174

# it compares the entire manifests. We can optimize it in the common use

174

# it compares the entire manifests. We can optimize it in the common use

175

# case of computing what copies are in a commit versus its parent (like

175

# case of computing what copies are in a commit versus its parent (like

176

# during a rebase or histedit). Note, we exclude merge commits from this

176

# during a rebase or histedit). Note, we exclude merge commits from this

177

# optimization, since the ctx.files() for a merge commit is not correct for

177

# optimization, since the ctx.files() for a merge commit is not correct for

178

# this comparison.

178

# this comparison.

179

forwardmissingmatch = match

179

forwardmissingmatch = match

180

if b.p1() == a and b.p2().node() == node.nullid:

180

if b.p1() == a and b.p2().node() == node.nullid:

181

filesmatcher = scmutil.matchfiles(a._repo, b.files())

181

filesmatcher = scmutil.matchfiles(a._repo, b.files())

182

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

182

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

183

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

183

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

184

185

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

185

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

186

for f in missing:

186

for f in missing:

187

fctx = b[f]

187

fctx = b[f]

188

fctx._ancestrycontext = ancestrycontext

188

fctx._ancestrycontext = ancestrycontext

189

ofctx = _tracefile(fctx, am, limit)

189

ofctx = _tracefile(fctx, am, limit)

190

if ofctx:

190

if ofctx:

191

cm[f] = ofctx.path()

191

cm[f] = ofctx.path()

192

return cm

192

return cm

193

194

def _forwardcopies(a, b, match=None):

194

def _forwardcopies(a, b, match=None):

195

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

195

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

196

197

# check for working copy

197

# check for working copy

198

if b.rev() is None:

198

if b.rev() is None:

199

if a == b.p1():

199

if a == b.p1():

200

# short-circuit to avoid issues with merge states

200

# short-circuit to avoid issues with merge states

201

return _dirstatecopies(b, match)

201

return _dirstatecopies(b, match)

202

203

cm = _committedforwardcopies(a, b.p1(), match)

203

cm = _committedforwardcopies(a, b.p1(), match)

204

# combine copies from dirstate if necessary

204

# combine copies from dirstate if necessary

205

return _chain(a, b, cm, _dirstatecopies(b, match))

205

return _chain(a, b, cm, _dirstatecopies(b, match))

206

return _committedforwardcopies(a, b, match)

206

return _committedforwardcopies(a, b, match)

207

208

def _backwardrenames(a, b):

208

def _backwardrenames(a, b):

209

if a._repo.ui.config('experimental', 'copytrace') == 'off':

209

if a._repo.ui.config('experimental', 'copytrace') == 'off':

210

return {}

210

return {}

211

212

# Even though we're not taking copies into account, 1:n rename situations

212

# Even though we're not taking copies into account, 1:n rename situations

213

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

213

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

214

# arbitrarily pick one of the renames.

214

# arbitrarily pick one of the renames.

215

f = _forwardcopies(b, a)

215

f = _forwardcopies(b, a)

216

r = {}

216

r = {}

217

for k, v in sorted(f.iteritems()):

217

for k, v in sorted(f.iteritems()):

218

# remove copies

218

# remove copies

219

if v in a:

219

if v in a:

220

continue

220

continue

221

r[v] = k

221

r[v] = k

222

return r

222

return r

223

224

def pathcopies(x, y, match=None):

224

def pathcopies(x, y, match=None):

225

"""find {dst@y: src@x} copy mapping for directed compare"""

225

"""find {dst@y: src@x} copy mapping for directed compare"""

226

if x == y or not x or not y:

226

if x == y or not x or not y:

227

return {}

227

return {}

228

a = y.ancestor(x)

228

a = y.ancestor(x)

229

if a == x:

229

if a == x:

230

return _forwardcopies(x, y, match=match)

230

return _forwardcopies(x, y, match=match)

231

if a == y:

231

if a == y:

232

return _backwardrenames(x, y)

232

return _backwardrenames(x, y)

233

return _chain(x, y, _backwardrenames(x, a),

233

return _chain(x, y, _backwardrenames(x, a),

234

_forwardcopies(a, y, match=match))

234

_forwardcopies(a, y, match=match))

235

236

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

236

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

237

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

237

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

238

and c2. This is its own function so extensions can easily wrap this call

238

and c2. This is its own function so extensions can easily wrap this call

239

to see what files mergecopies is about to process.

239

to see what files mergecopies is about to process.

240

241

Even though c1 and c2 are not used in this function, they are useful in

241

Even though c1 and c2 are not used in this function, they are useful in

242

other extensions for being able to read the file nodes of the changed files.

242

other extensions for being able to read the file nodes of the changed files.

243

244

"baselabel" can be passed to help distinguish the multiple computations

244

"baselabel" can be passed to help distinguish the multiple computations

245

done in the graft case.

245

done in the graft case.

246

"""

246

"""

247

u1 = sorted(addedinm1 - addedinm2)

247

u1 = sorted(addedinm1 - addedinm2)

248

u2 = sorted(addedinm2 - addedinm1)

248

u2 = sorted(addedinm2 - addedinm1)

249

250

header = " unmatched files in %s"

250

header = " unmatched files in %s"

251

if baselabel:

251

if baselabel:

252

header += ' (from %s)' % baselabel

252

header += ' (from %s)' % baselabel

253

if u1:

253

if u1:

254

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

254

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

255

if u2:

255

if u2:

256

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

256

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

257

258

narrowmatch = repo.narrowmatch()

258

narrowmatch = repo.narrowmatch()

259

if not narrowmatch.always():

259

if not narrowmatch.always():

260

u1 = [f for f in u1 if narrowmatch(f)]

260

u1 = [f for f in u1 if narrowmatch(f)]

261

u2 = [f for f in u2 if narrowmatch(f)]

261

u2 = [f for f in u2 if narrowmatch(f)]

262

return u1, u2

262

return u1, u2

263

264

def _makegetfctx(ctx):

264

def _makegetfctx(ctx):

265

"""return a 'getfctx' function suitable for _checkcopies usage

265

"""return a 'getfctx' function suitable for _checkcopies usage

266

267

We have to re-setup the function building 'filectx' for each

267

We have to re-setup the function building 'filectx' for each

268

'_checkcopies' to ensure the linkrev adjustment is properly setup for

268

'_checkcopies' to ensure the linkrev adjustment is properly setup for

269

each. Linkrev adjustment is important to avoid bug in rename

269

each. Linkrev adjustment is important to avoid bug in rename

270

detection. Moreover, having a proper '_ancestrycontext' setup ensures

270

detection. Moreover, having a proper '_ancestrycontext' setup ensures

271

the performance impact of this adjustment is kept limited. Without it,

271

the performance impact of this adjustment is kept limited. Without it,

272

each file could do a full dag traversal making the time complexity of

272

each file could do a full dag traversal making the time complexity of

273

the operation explode (see issue4537).

273

the operation explode (see issue4537).

274

275

This function exists here mostly to limit the impact on stable. Feel

275

This function exists here mostly to limit the impact on stable. Feel

276

free to refactor on default.

276

free to refactor on default.

277

"""

277

"""

278

rev = ctx.rev()

278

rev = ctx.rev()

279

repo = ctx._repo

279

repo = ctx._repo

280

ac = getattr(ctx, '_ancestrycontext', None)

280

ac = getattr(ctx, '_ancestrycontext', None)

281

if ac is None:

281

if ac is None:

282

revs = [rev]

282

revs = [rev]

283

if rev is None:

283

if rev is None:

284

revs = [p.rev() for p in ctx.parents()]

284

revs = [p.rev() for p in ctx.parents()]

285

ac = repo.changelog.ancestors(revs, inclusive=True)

285

ac = repo.changelog.ancestors(revs, inclusive=True)

286

ctx._ancestrycontext = ac

286

ctx._ancestrycontext = ac

287

def makectx(f, n):

287

def makectx(f, n):

288

if n in node.wdirfilenodeids: # in a working context?

288

if n in node.wdirfilenodeids: # in a working context?

289

if ctx.rev() is None:

289

if ctx.rev() is None:

290

return ctx.filectx(f)

290

return ctx.filectx(f)

291

return repo[None][f]

291

return repo[None][f]

292

fctx = repo.filectx(f, fileid=n)

292

fctx = repo.filectx(f, fileid=n)

293

# setup only needed for filectx not create from a changectx

293

# setup only needed for filectx not create from a changectx

294

fctx._ancestrycontext = ac

294

fctx._ancestrycontext = ac

295

fctx._descendantrev = rev

295

fctx._descendantrev = rev

296

return fctx

296

return fctx

297

return util.lrucachefunc(makectx)

297

return util.lrucachefunc(makectx)

298

299

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

299

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

300

"""combine partial copy paths"""

300

"""combine partial copy paths"""

301

remainder = {}

301

remainder = {}

302

for f in copyfrom:

302

for f in copyfrom:

303

if f in copyto:

303

if f in copyto:

304

finalcopy[copyto[f]] = copyfrom[f]

304

finalcopy[copyto[f]] = copyfrom[f]

305

del copyto[f]

305

del copyto[f]

306

for f in incompletediverge:

306

for f in incompletediverge:

307

assert f not in diverge

307

assert f not in diverge

308

ic = incompletediverge[f]

308

ic = incompletediverge[f]

309

if ic[0] in copyto:

309

if ic[0] in copyto:

310

diverge[f] = [copyto[ic[0]], ic[1]]

310

diverge[f] = [copyto[ic[0]], ic[1]]

311

else:

311

else:

312

remainder[f] = ic

312

remainder[f] = ic

313

return remainder

313

return remainder

314

315

def mergecopies(repo, c1, c2, base):

315

def mergecopies(repo, c1, c2, base):

316

"""

316

"""

317

The function calling different copytracing algorithms on the basis of config

317

The function calling different copytracing algorithms on the basis of config

318

which find moves and copies between context c1 and c2 that are relevant for

318

which find moves and copies between context c1 and c2 that are relevant for

319

merging. 'base' will be used as the merge base.

319

merging. 'base' will be used as the merge base.

320

321

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

321

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

322

files that were moved/ copied in one merge parent and modified in another.

322

files that were moved/ copied in one merge parent and modified in another.

323

For example:

323

For example:

324

325

o ---> 4 another commit

325

o ---> 4 another commit

326

|

326

|

327

| o ---> 3 commit that modifies a.txt

327

| o ---> 3 commit that modifies a.txt

328

| /

328

| /

329

o / ---> 2 commit that moves a.txt to b.txt

329

o / ---> 2 commit that moves a.txt to b.txt

330

|/

330

|/

331

o ---> 1 merge base

331

o ---> 1 merge base

332

333

If we try to rebase revision 3 on revision 4, since there is no a.txt in

333

If we try to rebase revision 3 on revision 4, since there is no a.txt in

334

revision 4, and if user have copytrace disabled, we prints the following

334

revision 4, and if user have copytrace disabled, we prints the following

335

message:

335

message:

336

337

```other changed <file> which local deleted```

337

```other changed <file> which local deleted```

338

339

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

339

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

340

"dirmove".

340

"dirmove".

341

342

"copy" is a mapping from destination name -> source name,

342

"copy" is a mapping from destination name -> source name,

343

where source is in c1 and destination is in c2 or vice-versa.

343

where source is in c1 and destination is in c2 or vice-versa.

344

345

"movewithdir" is a mapping from source name -> destination name,

345

"movewithdir" is a mapping from source name -> destination name,

346

where the file at source present in one context but not the other

346

where the file at source present in one context but not the other

347

needs to be moved to destination by the merge process, because the

347

needs to be moved to destination by the merge process, because the

348

other context moved the directory it is in.

348

other context moved the directory it is in.

349

350

"diverge" is a mapping of source name -> list of destination names

350

"diverge" is a mapping of source name -> list of destination names

351

for divergent renames.

351

for divergent renames.

352

353

"renamedelete" is a mapping of source name -> list of destination

353

"renamedelete" is a mapping of source name -> list of destination

354

names for files deleted in c1 that were renamed in c2 or vice-versa.

354

names for files deleted in c1 that were renamed in c2 or vice-versa.

355

356

"dirmove" is a mapping of detected source dir -> destination dir renames.

356

"dirmove" is a mapping of detected source dir -> destination dir renames.

357

This is needed for handling changes to new files previously grafted into

357

This is needed for handling changes to new files previously grafted into

358

renamed directories.

358

renamed directories.

359

"""

359

"""

360

# avoid silly behavior for update from empty dir

360

# avoid silly behavior for update from empty dir

361

if not c1 or not c2 or c1 == c2:

361

if not c1 or not c2 or c1 == c2:

362

return {}, {}, {}, {}, {}

362

return {}, {}, {}, {}, {}

363

364

# avoid silly behavior for parent -> working dir

364

# avoid silly behavior for parent -> working dir

365

if c2.node() is None and c1.node() == repo.dirstate.p1():

365

if c2.node() is None and c1.node() == repo.dirstate.p1():

366

return repo.dirstate.copies(), {}, {}, {}, {}

366

return repo.dirstate.copies(), {}, {}, {}, {}

367

368

copytracing = repo.ui.config('experimental', 'copytrace')

368

copytracing = repo.ui.config('experimental', 'copytrace')

369

370

# Copy trace disabling is explicitly below the node == p1 logic above

370

# Copy trace disabling is explicitly below the node == p1 logic above

371

# because the logic above is required for a simple copy to be kept across a

371

# because the logic above is required for a simple copy to be kept across a

372

# rebase.

372

# rebase.

373

if copytracing == 'off':

373

if copytracing == 'off':

374

return {}, {}, {}, {}, {}

374

return {}, {}, {}, {}, {}

375

elif copytracing == 'heuristics':

375

elif copytracing == 'heuristics':

376

# Do full copytracing if only non-public revisions are involved as

376

# Do full copytracing if only non-public revisions are involved as

377

# that will be fast enough and will also cover the copies which could

377

# that will be fast enough and will also cover the copies which could

378

# be missed by heuristics

378

# be missed by heuristics

379

if _isfullcopytraceable(repo, c1, base):

379

if _isfullcopytraceable(repo, c1, base):

380

return _fullcopytracing(repo, c1, c2, base)

380

return _fullcopytracing(repo, c1, c2, base)

381

return _heuristicscopytracing(repo, c1, c2, base)

381

return _heuristicscopytracing(repo, c1, c2, base)

382

else:

382

else:

383

return _fullcopytracing(repo, c1, c2, base)

383

return _fullcopytracing(repo, c1, c2, base)

384

385

def _isfullcopytraceable(repo, c1, base):

385

def _isfullcopytraceable(repo, c1, base):

386

""" Checks that if base, source and destination are all no-public branches,

386

""" Checks that if base, source and destination are all no-public branches,

387

if yes let's use the full copytrace algorithm for increased capabilities

387

if yes let's use the full copytrace algorithm for increased capabilities

388

since it will be fast enough.

388

since it will be fast enough.

389

390

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

390

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

391

number of changesets from c1 to base such that if number of changesets are

391

number of changesets from c1 to base such that if number of changesets are

392

more than the limit, full copytracing algorithm won't be used.

392

more than the limit, full copytracing algorithm won't be used.

393

"""

393

"""

394

if c1.rev() is None:

394

if c1.rev() is None:

395

c1 = c1.p1()

395

c1 = c1.p1()

396

if c1.mutable() and base.mutable():

396

if c1.mutable() and base.mutable():

397

sourcecommitlimit = repo.ui.configint('experimental',

397

sourcecommitlimit = repo.ui.configint('experimental',

398

'copytrace.sourcecommitlimit')

398

'copytrace.sourcecommitlimit')

399

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

399

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

400

return commits < sourcecommitlimit

400

return commits < sourcecommitlimit

401

return False

401

return False

402

403

def _fullcopytracing(repo, c1, c2, base):

403

def _fullcopytracing(repo, c1, c2, base):

404

""" The full copytracing algorithm which finds all the new files that were

404

""" The full copytracing algorithm which finds all the new files that were

405

added from merge base up to the top commit and for each file it checks if

405

added from merge base up to the top commit and for each file it checks if

406

this file was copied from another file.

406

this file was copied from another file.

407

408

This is pretty slow when a lot of changesets are involved but will track all

408

This is pretty slow when a lot of changesets are involved but will track all

409

the copies.

409

the copies.

410

"""

410

"""

411

# In certain scenarios (e.g. graft, update or rebase), base can be

411

# In certain scenarios (e.g. graft, update or rebase), base can be

412

# overridden We still need to know a real common ancestor in this case We

412

# overridden We still need to know a real common ancestor in this case We

413

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

413

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

414

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

414

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

415

# caller may not know if the revision passed in lieu of the CA is a genuine

415

# caller may not know if the revision passed in lieu of the CA is a genuine

416

# common ancestor or not without explicitly checking it, it's better to

416

# common ancestor or not without explicitly checking it, it's better to

417

# determine that here.

417

# determine that here.

418

#

418

#

419

# base.isancestorof(wc) is False, work around that

419

# base.isancestorof(wc) is False, work around that

420

_c1 = c1.p1() if c1.rev() is None else c1

420

_c1 = c1.p1() if c1.rev() is None else c1

421

_c2 = c2.p1() if c2.rev() is None else c2

421

_c2 = c2.p1() if c2.rev() is None else c2

422

# an endpoint is "dirty" if it isn't a descendant of the merge base

422

# an endpoint is "dirty" if it isn't a descendant of the merge base

423

# if we have a dirty endpoint, we need to trigger graft logic, and also

423

# if we have a dirty endpoint, we need to trigger graft logic, and also

424

# keep track of which endpoint is dirty

424

# keep track of which endpoint is dirty

425

dirtyc1 = not base.isancestorof(_c1)

425

dirtyc1 = not base.isancestorof(_c1)

426

dirtyc2 = not base.isancestorof(_c2)

426

dirtyc2 = not base.isancestorof(_c2)

427

graft = dirtyc1 or dirtyc2

427

graft = dirtyc1 or dirtyc2

428

tca = base

428

tca = base

429

if graft:

429

if graft:

430

tca = _c1.ancestor(_c2)

430

tca = _c1.ancestor(_c2)

431

432

limit = _findlimit(repo, c1.rev(), c2.rev())

432

limit = _findlimit(repo, c1.rev(), c2.rev())

433

if limit is None:

433

if limit is None:

434

# no common ancestor, no copies

434

# no common ancestor, no copies

435

return {}, {}, {}, {}, {}

435

return {}, {}, {}, {}, {}

436

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

436

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

437

438

m1 = c1.manifest()

438

m1 = c1.manifest()

439

m2 = c2.manifest()

439

m2 = c2.manifest()

440

mb = base.manifest()

440

mb = base.manifest()

441

442

# gather data from _checkcopies:

442

# gather data from _checkcopies:

443

# - diverge = record all diverges in this dict

443

# - diverge = record all diverges in this dict

444

# - copy = record all non-divergent copies in this dict

444

# - copy = record all non-divergent copies in this dict

445

# - fullcopy = record all copies in this dict

445

# - fullcopy = record all copies in this dict

446

# - incomplete = record non-divergent partial copies here

446

# - incomplete = record non-divergent partial copies here

447

# - incompletediverge = record divergent partial copies here

447

# - incompletediverge = record divergent partial copies here

448

diverge = {} # divergence data is shared

448

diverge = {} # divergence data is shared

449

incompletediverge = {}

449

incompletediverge = {}

450

data1 = {'copy': {},

450

data1 = {'copy': {},

451

'fullcopy': {},

451

'fullcopy': {},

452

'incomplete': {},

452

'incomplete': {},

453

'diverge': diverge,

453

'diverge': diverge,

454

'incompletediverge': incompletediverge,

454

'incompletediverge': incompletediverge,

455

}

455

}

456

data2 = {'copy': {},

456

data2 = {'copy': {},

457

'fullcopy': {},

457

'fullcopy': {},

458

'incomplete': {},

458

'incomplete': {},

459

'diverge': diverge,

459

'diverge': diverge,

460

'incompletediverge': incompletediverge,

460

'incompletediverge': incompletediverge,

461

}

461

}

462

463

# find interesting file sets from manifests

463

# find interesting file sets from manifests

464

addedinm1 = m1.filesnotin(mb)

464

addedinm1 = m1.filesnotin(mb)

465

addedinm2 = m2.filesnotin(mb)

465

addedinm2 = m2.filesnotin(mb)

466

bothnew = sorted(addedinm1 & addedinm2)

466

bothnew = sorted(addedinm1 & addedinm2)

467

if tca == base:

467

if tca == base:

468

# unmatched file from base

468

# unmatched file from base

469

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

469

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

470

u1u, u2u = u1r, u2r

470

u1u, u2u = u1r, u2r

471

else:

471

else:

472

# unmatched file from base (DAG rotation in the graft case)

472

# unmatched file from base (DAG rotation in the graft case)

473

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

473

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

474

baselabel='base')

474

baselabel='base')

475

# unmatched file from topological common ancestors (no DAG rotation)

475

# unmatched file from topological common ancestors (no DAG rotation)

476

# need to recompute this for directory move handling when grafting

476

# need to recompute this for directory move handling when grafting

477

mta = tca.manifest()

477

mta = tca.manifest()

478

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

478

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

479

m2.filesnotin(mta),

479

m2.filesnotin(mta),

480

baselabel='topological common ancestor')

480

baselabel='topological common ancestor')

481

482

for f in u1u:

482

for f in u1u:

483

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

483

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

484

485

for f in u2u:

485

for f in u2u:

486

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

486

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

487

488

copy = dict(data1['copy'])

488

copy = dict(data1['copy'])

489

copy.update(data2['copy'])

489

copy.update(data2['copy'])

490

fullcopy = dict(data1['fullcopy'])

490

fullcopy = dict(data1['fullcopy'])

491

fullcopy.update(data2['fullcopy'])

491

fullcopy.update(data2['fullcopy'])

492

493

if dirtyc1:

493

if dirtyc1:

494

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

494

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

495

incompletediverge)

495

incompletediverge)

496

else:

496

else:

497

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

497

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

498

incompletediverge)

498

incompletediverge)

499

500

renamedelete = {}

500

renamedelete = {}

501

renamedeleteset = set()

501

renamedeleteset = set()

502

divergeset = set()

502

divergeset = set()

503

for of, fl in list(diverge.items()):

503

for of, fl in list(diverge.items()):

504

if len(fl) == 1 or of in c1 or of in c2:

504

if len(fl) == 1 or of in c1 or of in c2:

505

del diverge[of] # not actually divergent, or not a rename

505

del diverge[of] # not actually divergent, or not a rename

506

if of not in c1 and of not in c2:

506

if of not in c1 and of not in c2:

507

# renamed on one side, deleted on the other side, but filter

507

# renamed on one side, deleted on the other side, but filter

508

# out files that have been renamed and then deleted

508

# out files that have been renamed and then deleted

509

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

509

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

510

renamedeleteset.update(fl) # reverse map for below

510

renamedeleteset.update(fl) # reverse map for below

511

else:

511

else:

512

divergeset.update(fl) # reverse map for below

512

divergeset.update(fl) # reverse map for below

513

514

if bothnew:

514

if bothnew:

515

repo.ui.debug(" unmatched files new in both:\n %s\n"

515

repo.ui.debug(" unmatched files new in both:\n %s\n"

516

% "\n ".join(bothnew))

516

% "\n ".join(bothnew))

517

bothdiverge = {}

517

bothdiverge = {}

518

bothincompletediverge = {}

518

bothincompletediverge = {}

519

remainder = {}

519

remainder = {}

520

both1 = {'copy': {},

520

both1 = {'copy': {},

521

'fullcopy': {},

521

'fullcopy': {},

522

'incomplete': {},

522

'incomplete': {},

523

'diverge': bothdiverge,

523

'diverge': bothdiverge,

524

'incompletediverge': bothincompletediverge

524

'incompletediverge': bothincompletediverge

525

}

525

}

526

both2 = {'copy': {},

526

both2 = {'copy': {},

527

'fullcopy': {},

527

'fullcopy': {},

528

'incomplete': {},

528

'incomplete': {},

529

'diverge': bothdiverge,

529

'diverge': bothdiverge,

530

'incompletediverge': bothincompletediverge

530

'incompletediverge': bothincompletediverge

531

}

531

}

532

for f in bothnew:

532

for f in bothnew:

533

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

533

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

534

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

534

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

535

if dirtyc1:

535

if dirtyc1:

536

# incomplete copies may only be found on the "dirty" side for bothnew

536

# incomplete copies may only be found on the "dirty" side for bothnew

537

assert not both2['incomplete']

537

assert not both2['incomplete']

538

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

538

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

539

bothincompletediverge)

539

bothincompletediverge)

540

elif dirtyc2:

540

elif dirtyc2:

541

assert not both1['incomplete']

541

assert not both1['incomplete']

542

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

542

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

543

bothincompletediverge)

543

bothincompletediverge)

544

else:

544

else:

545

# incomplete copies and divergences can't happen outside grafts

545

# incomplete copies and divergences can't happen outside grafts

546

assert not both1['incomplete']

546

assert not both1['incomplete']

547

assert not both2['incomplete']

547

assert not both2['incomplete']

548

assert not bothincompletediverge

548

assert not bothincompletediverge

549

for f in remainder:

549

for f in remainder:

550

assert f not in bothdiverge

550

assert f not in bothdiverge

551

ic = remainder[f]

551

ic = remainder[f]

552

if ic[0] in (m1 if dirtyc1 else m2):

552

if ic[0] in (m1 if dirtyc1 else m2):

553

# backed-out rename on one side, but watch out for deleted files

553

# backed-out rename on one side, but watch out for deleted files

554

bothdiverge[f] = ic

554

bothdiverge[f] = ic

555

for of, fl in bothdiverge.items():

555

for of, fl in bothdiverge.items():

556

if len(fl) == 2 and fl[0] == fl[1]:

556

if len(fl) == 2 and fl[0] == fl[1]:

557

copy[fl[0]] = of # not actually divergent, just matching renames

557

copy[fl[0]] = of # not actually divergent, just matching renames

558

559

if fullcopy and repo.ui.debugflag:

559

if fullcopy and repo.ui.debugflag:

560

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

560

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

561

"% = renamed and deleted):\n")

561

"% = renamed and deleted):\n")

562

for f in sorted(fullcopy):

562

for f in sorted(fullcopy):

563

note = ""

563

note = ""

564

if f in copy:

564

if f in copy:

565

note += "*"

565

note += "*"

566

if f in divergeset:

566

if f in divergeset:

567

note += "!"

567

note += "!"

568

if f in renamedeleteset:

568

if f in renamedeleteset:

569

note += "%"

569

note += "%"

570

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

570

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

571

note))

571

note))

572

del divergeset

572

del divergeset

573

574

if not fullcopy:

574

if not fullcopy:

575

return copy, {}, diverge, renamedelete, {}

575

return copy, {}, diverge, renamedelete, {}

576

577

repo.ui.debug(" checking for directory renames\n")

577

repo.ui.debug(" checking for directory renames\n")

578

579

# generate a directory move map

579

# generate a directory move map

580

d1, d2 = c1.dirs(), c2.dirs()

580

d1, d2 = c1.dirs(), c2.dirs()

581

# Hack for adding '', which is not otherwise added, to d1 and d2

581

# Hack for adding '', which is not otherwise added, to d1 and d2

582

d1.addpath('/')

582

d1.addpath('/')

583

d2.addpath('/')

583

d2.addpath('/')

584

invalid = set()

584

invalid = set()

585

dirmove = {}

585

dirmove = {}

586

587

# examine each file copy for a potential directory move, which is

587

# examine each file copy for a potential directory move, which is

588

# when all the files in a directory are moved to a new directory

588

# when all the files in a directory are moved to a new directory

589

for dst, src in fullcopy.iteritems():

589

for dst, src in fullcopy.iteritems():

590

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

590

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

591

if dsrc in invalid:

591

if dsrc in invalid:

592

# already seen to be uninteresting

592

# already seen to be uninteresting

593

continue

593

continue

594

elif dsrc in d1 and ddst in d1:

594

elif dsrc in d1 and ddst in d1:

595

# directory wasn't entirely moved locally

595

# directory wasn't entirely moved locally

596

invalid.add(dsrc + ~~"/"~~)

596

invalid.add(dsrc)

597

elif dsrc in d2 and ddst in d2:

597

elif dsrc in d2 and ddst in d2:

598

# directory wasn't entirely moved remotely

598

# directory wasn't entirely moved remotely

599

invalid.add(dsrc + ~~"/"~~)

599

invalid.add(dsrc)

600

elif dsrc + ~~"/"~~ in dirmove and dirmove[dsrc + ~~"/"~~] != ddst + ~~"/"~~:

600

elif dsrc in dirmove and dirmove[dsrc] != ddst:

601

# files from the same directory moved to two different places

601

# files from the same directory moved to two different places

602

invalid.add(dsrc + ~~"/"~~)

602

invalid.add(dsrc)

603

else:

603

else:

604

# looks good so far

604

# looks good so far

605

dirmove[dsrc + ~~"/"~~] = ddst + ~~"/"~~

605

dirmove[dsrc] = ddst

606

607

for i in invalid:

607

for i in invalid:

608

if i in dirmove:

608

if i in dirmove:

609

del dirmove[i]

609

del dirmove[i]

610

del d1, d2, invalid

610

del d1, d2, invalid

611

612

if not dirmove:

612

if not dirmove:

613

return copy, {}, diverge, renamedelete, {}

613

return copy, {}, diverge, renamedelete, {}

614

615

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

616

615

for d in dirmove:

617

for d in dirmove:

616

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

618

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

617

(d, dirmove[d]))

619

(d, dirmove[d]))

618

620

619

movewithdir = {}

621

movewithdir = {}

620

# check unaccounted nonoverlapping files against directory moves

622

# check unaccounted nonoverlapping files against directory moves

621

for f in u1r + u2r:

623

for f in u1r + u2r:

622

if f not in fullcopy:

624

if f not in fullcopy:

623

for d in dirmove:

625

for d in dirmove:

624

if f.startswith(d):

626

if f.startswith(d):

625

# new file added in a directory that was moved, move it

627

# new file added in a directory that was moved, move it

626

df = dirmove[d] + f[len(d):]

628

df = dirmove[d] + f[len(d):]

627

if df not in copy:

629

if df not in copy:

628

movewithdir[f] = df

630

movewithdir[f] = df

629

repo.ui.debug((" pending file src: '%s' -> "

631

repo.ui.debug((" pending file src: '%s' -> "

630

"dst: '%s'\n") % (f, df))

632

"dst: '%s'\n") % (f, df))

631

break

633

break

632

634

633

return copy, movewithdir, diverge, renamedelete, dirmove

635

return copy, movewithdir, diverge, renamedelete, dirmove

634

636

635

def _heuristicscopytracing(repo, c1, c2, base):

637

def _heuristicscopytracing(repo, c1, c2, base):

636

""" Fast copytracing using filename heuristics

638

""" Fast copytracing using filename heuristics

637

639

638

Assumes that moves or renames are of following two types:

640

Assumes that moves or renames are of following two types:

639

641

640

1) Inside a directory only (same directory name but different filenames)

642

1) Inside a directory only (same directory name but different filenames)

641

2) Move from one directory to another

643

2) Move from one directory to another

642

(same filenames but different directory names)

644

(same filenames but different directory names)

643

645

644

Works only when there are no merge commits in the "source branch".

646

Works only when there are no merge commits in the "source branch".

645

Source branch is commits from base up to c2 not including base.

647

Source branch is commits from base up to c2 not including base.

646

648

647

If merge is involved it fallbacks to _fullcopytracing().

649

If merge is involved it fallbacks to _fullcopytracing().

648

650

649

Can be used by setting the following config:

651

Can be used by setting the following config:

650

652

651

[experimental]

653

[experimental]

652

copytrace = heuristics

654

copytrace = heuristics

653

655

654

In some cases the copy/move candidates found by heuristics can be very large

656

In some cases the copy/move candidates found by heuristics can be very large

655

in number and that will make the algorithm slow. The number of possible

657

in number and that will make the algorithm slow. The number of possible

656

candidates to check can be limited by using the config

658

candidates to check can be limited by using the config

657

`experimental.copytrace.movecandidateslimit` which defaults to 100.

659

`experimental.copytrace.movecandidateslimit` which defaults to 100.

658

"""

660

"""

659

661

660

if c1.rev() is None:

662

if c1.rev() is None:

661

c1 = c1.p1()

663

c1 = c1.p1()

662

if c2.rev() is None:

664

if c2.rev() is None:

663

c2 = c2.p1()

665

c2 = c2.p1()

664

666

665

copies = {}

667

copies = {}

666

668

667

changedfiles = set()

669

changedfiles = set()

668

m1 = c1.manifest()

670

m1 = c1.manifest()

669

if not repo.revs('%d::%d', base.rev(), c2.rev()):

671

if not repo.revs('%d::%d', base.rev(), c2.rev()):

670

# If base is not in c2 branch, we switch to fullcopytracing

672

# If base is not in c2 branch, we switch to fullcopytracing

671

repo.ui.debug("switching to full copytracing as base is not "

673

repo.ui.debug("switching to full copytracing as base is not "

672

"an ancestor of c2\n")

674

"an ancestor of c2\n")

673

return _fullcopytracing(repo, c1, c2, base)

675

return _fullcopytracing(repo, c1, c2, base)

674

676

675

ctx = c2

677

ctx = c2

676

while ctx != base:

678

while ctx != base:

677

if len(ctx.parents()) == 2:

679

if len(ctx.parents()) == 2:

678

# To keep things simple let's not handle merges

680

# To keep things simple let's not handle merges

679

repo.ui.debug("switching to full copytracing because of merges\n")

681

repo.ui.debug("switching to full copytracing because of merges\n")

680

return _fullcopytracing(repo, c1, c2, base)

682

return _fullcopytracing(repo, c1, c2, base)

681

changedfiles.update(ctx.files())

683

changedfiles.update(ctx.files())

682

ctx = ctx.p1()

684

ctx = ctx.p1()

683

685

684

cp = _forwardcopies(base, c2)

686

cp = _forwardcopies(base, c2)

685

for dst, src in cp.iteritems():

687

for dst, src in cp.iteritems():

686

if src in m1:

688

if src in m1:

687

copies[dst] = src

689

copies[dst] = src

688

690

689

# file is missing if it isn't present in the destination, but is present in

691

# file is missing if it isn't present in the destination, but is present in

690

# the base and present in the source.

692

# the base and present in the source.

691

# Presence in the base is important to exclude added files, presence in the

693

# Presence in the base is important to exclude added files, presence in the

692

# source is important to exclude removed files.

694

# source is important to exclude removed files.

693

filt = lambda f: f not in m1 and f in base and f in c2

695

filt = lambda f: f not in m1 and f in base and f in c2

694

missingfiles = [f for f in changedfiles if filt(f)]

696

missingfiles = [f for f in changedfiles if filt(f)]

695

697

696

if missingfiles:

698

if missingfiles:

697

basenametofilename = collections.defaultdict(list)

699

basenametofilename = collections.defaultdict(list)

698

dirnametofilename = collections.defaultdict(list)

700

dirnametofilename = collections.defaultdict(list)

699

701

700

for f in m1.filesnotin(base.manifest()):

702

for f in m1.filesnotin(base.manifest()):

701

basename = os.path.basename(f)

703

basename = os.path.basename(f)

702

dirname = os.path.dirname(f)

704

dirname = os.path.dirname(f)

703

basenametofilename[basename].append(f)

705

basenametofilename[basename].append(f)

704

dirnametofilename[dirname].append(f)

706

dirnametofilename[dirname].append(f)

705

707

706

for f in missingfiles:

708

for f in missingfiles:

707

basename = os.path.basename(f)

709

basename = os.path.basename(f)

708

dirname = os.path.dirname(f)

710

dirname = os.path.dirname(f)

709

samebasename = basenametofilename[basename]

711

samebasename = basenametofilename[basename]

710

samedirname = dirnametofilename[dirname]

712

samedirname = dirnametofilename[dirname]

711

movecandidates = samebasename + samedirname

713

movecandidates = samebasename + samedirname

712

# f is guaranteed to be present in c2, that's why

714

# f is guaranteed to be present in c2, that's why

713

# c2.filectx(f) won't fail

715

# c2.filectx(f) won't fail

714

f2 = c2.filectx(f)

716

f2 = c2.filectx(f)

715

# we can have a lot of candidates which can slow down the heuristics

717

# we can have a lot of candidates which can slow down the heuristics

716

# config value to limit the number of candidates moves to check

718

# config value to limit the number of candidates moves to check

717

maxcandidates = repo.ui.configint('experimental',

719

maxcandidates = repo.ui.configint('experimental',

718

'copytrace.movecandidateslimit')

720

'copytrace.movecandidateslimit')

719

721

720

if len(movecandidates) > maxcandidates:

722

if len(movecandidates) > maxcandidates:

721

repo.ui.status(_("skipping copytracing for '%s', more "

723

repo.ui.status(_("skipping copytracing for '%s', more "

722

"candidates than the limit: %d\n")

724

"candidates than the limit: %d\n")

723

% (f, len(movecandidates)))

725

% (f, len(movecandidates)))

724

continue

726

continue

725

727

726

for candidate in movecandidates:

728

for candidate in movecandidates:

727

f1 = c1.filectx(candidate)

729

f1 = c1.filectx(candidate)

728

if _related(f1, f2):

730

if _related(f1, f2):

729

# if there are a few related copies then we'll merge

731

# if there are a few related copies then we'll merge

730

# changes into all of them. This matches the behaviour

732

# changes into all of them. This matches the behaviour

731

# of upstream copytracing

733

# of upstream copytracing

732

copies[candidate] = f

734

copies[candidate] = f

733

735

734

return copies, {}, {}, {}, {}

736

return copies, {}, {}, {}, {}

735

737

736

def _related(f1, f2):

738

def _related(f1, f2):

737

"""return True if f1 and f2 filectx have a common ancestor

739

"""return True if f1 and f2 filectx have a common ancestor

738

740

739

Walk back to common ancestor to see if the two files originate

741

Walk back to common ancestor to see if the two files originate

740

from the same file. Since workingfilectx's rev() is None it messes

742

from the same file. Since workingfilectx's rev() is None it messes

741

up the integer comparison logic, hence the pre-step check for

743

up the integer comparison logic, hence the pre-step check for

742

None (f1 and f2 can only be workingfilectx's initially).

744

None (f1 and f2 can only be workingfilectx's initially).

743

"""

745

"""

744

746

745

if f1 == f2:

747

if f1 == f2:

746

return f1 # a match

748

return f1 # a match

747

749

748

g1, g2 = f1.ancestors(), f2.ancestors()

750

g1, g2 = f1.ancestors(), f2.ancestors()

749

try:

751

try:

750

f1r, f2r = f1.linkrev(), f2.linkrev()

752

f1r, f2r = f1.linkrev(), f2.linkrev()

751

753

752

if f1r is None:

754

if f1r is None:

753

f1 = next(g1)

755

f1 = next(g1)

754

if f2r is None:

756

if f2r is None:

755

f2 = next(g2)

757

f2 = next(g2)

756

758

757

while True:

759

while True:

758

f1r, f2r = f1.linkrev(), f2.linkrev()

760

f1r, f2r = f1.linkrev(), f2.linkrev()

759

if f1r > f2r:

761

if f1r > f2r:

760

f1 = next(g1)

762

f1 = next(g1)

761

elif f2r > f1r:

763

elif f2r > f1r:

762

f2 = next(g2)

764

f2 = next(g2)

763

else: # f1 and f2 point to files in the same linkrev

765

else: # f1 and f2 point to files in the same linkrev

764

return f1 == f2 # true if they point to the same file

766

return f1 == f2 # true if they point to the same file

765

except StopIteration:

767

except StopIteration:

766

return False

768

return False

767

769

768

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

770

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

769

"""

771

"""

770

check possible copies of f from msrc to mdst

772

check possible copies of f from msrc to mdst

771

773

772

srcctx = starting context for f in msrc

774

srcctx = starting context for f in msrc

773

dstctx = destination context for f in mdst

775

dstctx = destination context for f in mdst

774

f = the filename to check (as in msrc)

776

f = the filename to check (as in msrc)

775

base = the changectx used as a merge base

777

base = the changectx used as a merge base

776

tca = topological common ancestor for graft-like scenarios

778

tca = topological common ancestor for graft-like scenarios

777

remotebase = True if base is outside tca::srcctx, False otherwise

779

remotebase = True if base is outside tca::srcctx, False otherwise

778

limit = the rev number to not search beyond

780

limit = the rev number to not search beyond

779

data = dictionary of dictionary to store copy data. (see mergecopies)

781

data = dictionary of dictionary to store copy data. (see mergecopies)

780

782

781

note: limit is only an optimization, and provides no guarantee that

783

note: limit is only an optimization, and provides no guarantee that

782

irrelevant revisions will not be visited

784

irrelevant revisions will not be visited

783

there is no easy way to make this algorithm stop in a guaranteed way

785

there is no easy way to make this algorithm stop in a guaranteed way

784

once it "goes behind a certain revision".

786

once it "goes behind a certain revision".

785

"""

787

"""

786

788

787

msrc = srcctx.manifest()

789

msrc = srcctx.manifest()

788

mdst = dstctx.manifest()

790

mdst = dstctx.manifest()

789

mb = base.manifest()

791

mb = base.manifest()

790

mta = tca.manifest()

792

mta = tca.manifest()

791

# Might be true if this call is about finding backward renames,

793

# Might be true if this call is about finding backward renames,

792

# This happens in the case of grafts because the DAG is then rotated.

794

# This happens in the case of grafts because the DAG is then rotated.

793

# If the file exists in both the base and the source, we are not looking

795

# If the file exists in both the base and the source, we are not looking

794

# for a rename on the source side, but on the part of the DAG that is

796

# for a rename on the source side, but on the part of the DAG that is

795

# traversed backwards.

797

# traversed backwards.

796

#

798

#

797

# In the case there is both backward and forward renames (before and after

799

# In the case there is both backward and forward renames (before and after

798

# the base) this is more complicated as we must detect a divergence.

800

# the base) this is more complicated as we must detect a divergence.

799

# We use 'backwards = False' in that case.

801

# We use 'backwards = False' in that case.

800

backwards = not remotebase and base != tca and f in mb

802

backwards = not remotebase and base != tca and f in mb

801

getsrcfctx = _makegetfctx(srcctx)

803

getsrcfctx = _makegetfctx(srcctx)

802

getdstfctx = _makegetfctx(dstctx)

804

getdstfctx = _makegetfctx(dstctx)

803

805

804

if msrc[f] == mb.get(f) and not remotebase:

806

if msrc[f] == mb.get(f) and not remotebase:

805

# Nothing to merge

807

# Nothing to merge

806

return

808

return

807

809

808

of = None

810

of = None

809

seen = {f}

811

seen = {f}

810

for oc in getsrcfctx(f, msrc[f]).ancestors():

812

for oc in getsrcfctx(f, msrc[f]).ancestors():

811

ocr = oc.linkrev()

813

ocr = oc.linkrev()

812

of = oc.path()

814

of = oc.path()

813

if of in seen:

815

if of in seen:

814

# check limit late - grab last rename before

816

# check limit late - grab last rename before

815

if ocr < limit:

817

if ocr < limit:

816

break

818

break

817

continue

819

continue

818

seen.add(of)

820

seen.add(of)

819

821

820

# remember for dir rename detection

822

# remember for dir rename detection

821

if backwards:

823

if backwards:

822

data['fullcopy'][of] = f # grafting backwards through renames

824

data['fullcopy'][of] = f # grafting backwards through renames

823

else:

825

else:

824

data['fullcopy'][f] = of

826

data['fullcopy'][f] = of

825

if of not in mdst:

827

if of not in mdst:

826

continue # no match, keep looking

828

continue # no match, keep looking

827

if mdst[of] == mb.get(of):

829

if mdst[of] == mb.get(of):

828

return # no merge needed, quit early

830

return # no merge needed, quit early

829

c2 = getdstfctx(of, mdst[of])

831

c2 = getdstfctx(of, mdst[of])

830

# c2 might be a plain new file on added on destination side that is

832

# c2 might be a plain new file on added on destination side that is

831

# unrelated to the droids we are looking for.

833

# unrelated to the droids we are looking for.

832

cr = _related(oc, c2)

834

cr = _related(oc, c2)

833

if cr and (of == f or of == c2.path()): # non-divergent

835

if cr and (of == f or of == c2.path()): # non-divergent

834

if backwards:

836

if backwards:

835

data['copy'][of] = f

837

data['copy'][of] = f

836

elif of in mb:

838

elif of in mb:

837

data['copy'][f] = of

839

data['copy'][f] = of

838

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

840

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

839

data['copy'][of] = f

841

data['copy'][of] = f

840

del data['fullcopy'][f]

842

del data['fullcopy'][f]

841

data['fullcopy'][of] = f

843

data['fullcopy'][of] = f

842

else: # divergence w.r.t. graft CA on one side of topological CA

844

else: # divergence w.r.t. graft CA on one side of topological CA

843

for sf in seen:

845

for sf in seen:

844

if sf in mb:

846

if sf in mb:

845

assert sf not in data['diverge']

847

assert sf not in data['diverge']

846

data['diverge'][sf] = [f, of]

848

data['diverge'][sf] = [f, of]

847

break

849

break

848

return

850

return

849

851

850

if of in mta:

852

if of in mta:

851

if backwards or remotebase:

853

if backwards or remotebase:

852

data['incomplete'][of] = f

854

data['incomplete'][of] = f

853

else:

855

else:

854

for sf in seen:

856

for sf in seen:

855

if sf in mb:

857

if sf in mb:

856

if tca == base:

858

if tca == base:

857

data['diverge'].setdefault(sf, []).append(f)

859

data['diverge'].setdefault(sf, []).append(f)

858

else:

860

else:

859

data['incompletediverge'][sf] = [of, f]

861

data['incompletediverge'][sf] = [of, f]

860

return

862

return

861

863

862

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

864

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

863

"""reproduce copies from fromrev to rev in the dirstate

865

"""reproduce copies from fromrev to rev in the dirstate

864

866

865

If skiprev is specified, it's a revision that should be used to

867

If skiprev is specified, it's a revision that should be used to

866

filter copy records. Any copies that occur between fromrev and

868

filter copy records. Any copies that occur between fromrev and

867

skiprev will not be duplicated, even if they appear in the set of

869

skiprev will not be duplicated, even if they appear in the set of

868

copies between fromrev and rev.

870

copies between fromrev and rev.

869

"""

871

"""

870

exclude = {}

872

exclude = {}

871

if (skiprev is not None and

873

if (skiprev is not None and

872

repo.ui.config('experimental', 'copytrace') != 'off'):

874

repo.ui.config('experimental', 'copytrace') != 'off'):

873

# copytrace='off' skips this line, but not the entire function because

875

# copytrace='off' skips this line, but not the entire function because

874

# the line below is O(size of the repo) during a rebase, while the rest

876

# the line below is O(size of the repo) during a rebase, while the rest

875

# of the function is much faster (and is required for carrying copy

877

# of the function is much faster (and is required for carrying copy

876

# metadata across the rebase anyway).

878

# metadata across the rebase anyway).

877

exclude = pathcopies(repo[fromrev], repo[skiprev])

879

exclude = pathcopies(repo[fromrev], repo[skiprev])

878

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

880

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

879

# copies.pathcopies returns backward renames, so dst might not

881

# copies.pathcopies returns backward renames, so dst might not

880

# actually be in the dirstate

882

# actually be in the dirstate

881

if dst in exclude:

883

if dst in exclude:

882

continue

884

continue

883

wctx[dst].markcopied(src)

885

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in list(t.items()):
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d, match=None):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 return cm
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 narrowmatch = repo.narrowmatch()
                 if not narrowmatch.always():
                     u1 = [f for f in u1 if narrowmatch(f)]
                     u2 = [f for f in u2 if narrowmatch(f)]
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirfilenodeids:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'off':
                     return {}, {}, {}, {}, {}
                 elif copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.isancestorof(wc) is False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not base.isancestorof(_c1)
                 dirtyc2 = not base.isancestorof(_c2)
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb)
                 addedinm2 = m2.filesnotin(mb)
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
                                                                 m2.filesnotin(mta),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
-                        invalid.add(dsrc + "/")
+                        invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
-                        invalid.add(dsrc + "/")
+                        invalid.add(dsrc)
-                    elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
+                    elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
-                        invalid.add(dsrc + "/")
+                        invalid.add(dsrc)
                     else:
                         # looks good so far
-                        dirmove[dsrc + "/"] = ddst + "/"
+                        dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
+                dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 if (skiprev is not None and
                     repo.ui.config('experimental', 'copytrace') != 'off'):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)