upstream/mercurial-mirror Commit - r34365:d00910b2

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from . import (

14

from . import (

15

match as matchmod,

15

match as matchmod,

16

node,

16

node,

17

pathutil,

17

pathutil,

18

phases,

19

scmutil,

18

scmutil,

20

util,

19

util,

21

)

20

)

22

21

23

def _findlimit(repo, a, b):

22

def _findlimit(repo, a, b):

24

"""

23

"""

25

Find the last revision that needs to be checked to ensure that a full

24

Find the last revision that needs to be checked to ensure that a full

26

transitive closure for file copies can be properly calculated.

25

transitive closure for file copies can be properly calculated.

27

Generally, this means finding the earliest revision number that's an

26

Generally, this means finding the earliest revision number that's an

28

ancestor of a or b but not both, except when a or b is a direct descendent

27

ancestor of a or b but not both, except when a or b is a direct descendent

29

of the other, in which case we can return the minimum revnum of a and b.

28

of the other, in which case we can return the minimum revnum of a and b.

30

None if no such revision exists.

29

None if no such revision exists.

31

"""

30

"""

32

31

33

# basic idea:

32

# basic idea:

34

# - mark a and b with different sides

33

# - mark a and b with different sides

35

# - if a parent's children are all on the same side, the parent is

34

# - if a parent's children are all on the same side, the parent is

36

# on that side, otherwise it is on no side

35

# on that side, otherwise it is on no side

37

# - walk the graph in topological order with the help of a heap;

36

# - walk the graph in topological order with the help of a heap;

38

# - add unseen parents to side map

37

# - add unseen parents to side map

39

# - clear side of any parent that has children on different sides

38

# - clear side of any parent that has children on different sides

40

# - track number of interesting revs that might still be on a side

39

# - track number of interesting revs that might still be on a side

41

# - track the lowest interesting rev seen

40

# - track the lowest interesting rev seen

42

# - quit when interesting revs is zero

41

# - quit when interesting revs is zero

43

42

44

cl = repo.changelog

43

cl = repo.changelog

45

working = len(cl) # pseudo rev for the working directory

44

working = len(cl) # pseudo rev for the working directory

46

if a is None:

45

if a is None:

47

a = working

46

a = working

48

if b is None:

47

if b is None:

49

b = working

48

b = working

50

49

51

side = {a: -1, b: 1}

50

side = {a: -1, b: 1}

52

visit = [-a, -b]

51

visit = [-a, -b]

53

heapq.heapify(visit)

52

heapq.heapify(visit)

54

interesting = len(visit)

53

interesting = len(visit)

55

hascommonancestor = False

54

hascommonancestor = False

56

limit = working

55

limit = working

57

56

58

while interesting:

57

while interesting:

59

r = -heapq.heappop(visit)

58

r = -heapq.heappop(visit)

60

if r == working:

59

if r == working:

61

parents = [cl.rev(p) for p in repo.dirstate.parents()]

60

parents = [cl.rev(p) for p in repo.dirstate.parents()]

62

else:

61

else:

63

parents = cl.parentrevs(r)

62

parents = cl.parentrevs(r)

64

for p in parents:

63

for p in parents:

65

if p < 0:

64

if p < 0:

66

continue

65

continue

67

if p not in side:

66

if p not in side:

68

# first time we see p; add it to visit

67

# first time we see p; add it to visit

69

side[p] = side[r]

68

side[p] = side[r]

70

if side[p]:

69

if side[p]:

71

interesting += 1

70

interesting += 1

72

heapq.heappush(visit, -p)

71

heapq.heappush(visit, -p)

73

elif side[p] and side[p] != side[r]:

72

elif side[p] and side[p] != side[r]:

74

# p was interesting but now we know better

73

# p was interesting but now we know better

75

side[p] = 0

74

side[p] = 0

76

interesting -= 1

75

interesting -= 1

77

hascommonancestor = True

76

hascommonancestor = True

78

if side[r]:

77

if side[r]:

79

limit = r # lowest rev visited

78

limit = r # lowest rev visited

80

interesting -= 1

79

interesting -= 1

81

80

82

if not hascommonancestor:

81

if not hascommonancestor:

83

return None

82

return None

84

83

85

# Consider the following flow (see test-commit-amend.t under issue4405):

84

# Consider the following flow (see test-commit-amend.t under issue4405):

86

# 1/ File 'a0' committed

85

# 1/ File 'a0' committed

87

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

86

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

88

# 3/ Move back to first commit

87

# 3/ Move back to first commit

89

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

88

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

90

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

89

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

91

#

90

#

92

# During the amend in step five, we will be in this state:

91

# During the amend in step five, we will be in this state:

93

#

92

#

94

# @ 3 temporary amend commit for a1-amend

93

# @ 3 temporary amend commit for a1-amend

95

# |

94

# |

96

# o 2 a1-amend

95

# o 2 a1-amend

97

# |

96

# |

98

# | o 1 a1

97

# | o 1 a1

99

# |/

98

# |/

100

# o 0 a0

99

# o 0 a0

101

#

100

#

102

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

101

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

103

# yet the filelog has the copy information in rev 1 and we will not look

102

# yet the filelog has the copy information in rev 1 and we will not look

104

# back far enough unless we also look at the a and b as candidates.

103

# back far enough unless we also look at the a and b as candidates.

105

# This only occurs when a is a descendent of b or visa-versa.

104

# This only occurs when a is a descendent of b or visa-versa.

106

return min(limit, a, b)

105

return min(limit, a, b)

107

106

108

def _chain(src, dst, a, b):

107

def _chain(src, dst, a, b):

109

'''chain two sets of copies a->b'''

108

'''chain two sets of copies a->b'''

110

t = a.copy()

109

t = a.copy()

111

for k, v in b.iteritems():

110

for k, v in b.iteritems():

112

if v in t:

111

if v in t:

113

# found a chain

112

# found a chain

114

if t[v] != k:

113

if t[v] != k:

115

# file wasn't renamed back to itself

114

# file wasn't renamed back to itself

116

t[k] = t[v]

115

t[k] = t[v]

117

if v not in dst:

116

if v not in dst:

118

# chain was a rename, not a copy

117

# chain was a rename, not a copy

119

del t[v]

118

del t[v]

120

if v in src:

119

if v in src:

121

# file is a copy of an existing file

120

# file is a copy of an existing file

122

t[k] = v

121

t[k] = v

123

122

124

# remove criss-crossed copies

123

# remove criss-crossed copies

125

for k, v in t.items():

124

for k, v in t.items():

126

if k in src and v in dst:

125

if k in src and v in dst:

127

del t[k]

126

del t[k]

128

127

129

return t

128

return t

130

129

131

def _tracefile(fctx, am, limit=-1):

130

def _tracefile(fctx, am, limit=-1):

132

'''return file context that is the ancestor of fctx present in ancestor

131

'''return file context that is the ancestor of fctx present in ancestor

133

manifest am, stopping after the first ancestor lower than limit'''

132

manifest am, stopping after the first ancestor lower than limit'''

134

133

135

for f in fctx.ancestors():

134

for f in fctx.ancestors():

136

if am.get(f.path(), None) == f.filenode():

135

if am.get(f.path(), None) == f.filenode():

137

return f

136

return f

138

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

137

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

139

return None

138

return None

140

139

141

def _dirstatecopies(d):

140

def _dirstatecopies(d):

142

ds = d._repo.dirstate

141

ds = d._repo.dirstate

143

c = ds.copies().copy()

142

c = ds.copies().copy()

144

for k in list(c):

143

for k in list(c):

145

if ds[k] not in 'anm':

144

if ds[k] not in 'anm':

146

del c[k]

145

del c[k]

147

return c

146

return c

148

147

149

def _computeforwardmissing(a, b, match=None):

148

def _computeforwardmissing(a, b, match=None):

150

"""Computes which files are in b but not a.

149

"""Computes which files are in b but not a.

151

This is its own function so extensions can easily wrap this call to see what

150

This is its own function so extensions can easily wrap this call to see what

152

files _forwardcopies is about to process.

151

files _forwardcopies is about to process.

153

"""

152

"""

154

ma = a.manifest()

153

ma = a.manifest()

155

mb = b.manifest()

154

mb = b.manifest()

156

return mb.filesnotin(ma, match=match)

155

return mb.filesnotin(ma, match=match)

157

156

158

def _forwardcopies(a, b, match=None):

157

def _forwardcopies(a, b, match=None):

159

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

160

159

161

# check for working copy

160

# check for working copy

162

w = None

161

w = None

163

if b.rev() is None:

162

if b.rev() is None:

164

w = b

163

w = b

165

b = w.p1()

164

b = w.p1()

166

if a == b:

165

if a == b:

167

# short-circuit to avoid issues with merge states

166

# short-circuit to avoid issues with merge states

168

return _dirstatecopies(w)

167

return _dirstatecopies(w)

169

168

170

# files might have to be traced back to the fctx parent of the last

169

# files might have to be traced back to the fctx parent of the last

171

# one-side-only changeset, but not further back than that

170

# one-side-only changeset, but not further back than that

172

limit = _findlimit(a._repo, a.rev(), b.rev())

171

limit = _findlimit(a._repo, a.rev(), b.rev())

173

if limit is None:

172

if limit is None:

174

limit = -1

173

limit = -1

175

am = a.manifest()

174

am = a.manifest()

176

175

177

# find where new files came from

176

# find where new files came from

178

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

179

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

180

cm = {}

179

cm = {}

181

180

182

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

183

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

184

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

185

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

186

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

187

# this comparison.

186

# this comparison.

188

forwardmissingmatch = match

187

forwardmissingmatch = match

189

if b.p1() == a and b.p2().node() == node.nullid:

188

if b.p1() == a and b.p2().node() == node.nullid:

190

filesmatcher = scmutil.matchfiles(a._repo, b.files())

189

filesmatcher = scmutil.matchfiles(a._repo, b.files())

191

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

190

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

192

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

193

192

194

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

195

for f in missing:

194

for f in missing:

196

fctx = b[f]

195

fctx = b[f]

197

fctx._ancestrycontext = ancestrycontext

196

fctx._ancestrycontext = ancestrycontext

198

ofctx = _tracefile(fctx, am, limit)

197

ofctx = _tracefile(fctx, am, limit)

199

if ofctx:

198

if ofctx:

200

cm[f] = ofctx.path()

199

cm[f] = ofctx.path()

201

200

202

# combine copies from dirstate if necessary

201

# combine copies from dirstate if necessary

203

if w is not None:

202

if w is not None:

204

cm = _chain(a, w, cm, _dirstatecopies(w))

203

cm = _chain(a, w, cm, _dirstatecopies(w))

205

204

206

return cm

205

return cm

207

206

208

def _backwardrenames(a, b):

207

def _backwardrenames(a, b):

209

if a._repo.ui.config('experimental', 'copytrace') == 'off':

208

if a._repo.ui.config('experimental', 'copytrace') == 'off':

210

return {}

209

return {}

211

210

212

# Even though we're not taking copies into account, 1:n rename situations

211

# Even though we're not taking copies into account, 1:n rename situations

213

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

212

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

214

# arbitrarily pick one of the renames.

213

# arbitrarily pick one of the renames.

215

f = _forwardcopies(b, a)

214

f = _forwardcopies(b, a)

216

r = {}

215

r = {}

217

for k, v in sorted(f.iteritems()):

216

for k, v in sorted(f.iteritems()):

218

# remove copies

217

# remove copies

219

if v in a:

218

if v in a:

220

continue

219

continue

221

r[v] = k

220

r[v] = k

222

return r

221

return r

223

222

224

def pathcopies(x, y, match=None):

223

def pathcopies(x, y, match=None):

225

'''find {dst@y: src@x} copy mapping for directed compare'''

224

'''find {dst@y: src@x} copy mapping for directed compare'''

226

if x == y or not x or not y:

225

if x == y or not x or not y:

227

return {}

226

return {}

228

a = y.ancestor(x)

227

a = y.ancestor(x)

229

if a == x:

228

if a == x:

230

return _forwardcopies(x, y, match=match)

229

return _forwardcopies(x, y, match=match)

231

if a == y:

230

if a == y:

232

return _backwardrenames(x, y)

231

return _backwardrenames(x, y)

233

return _chain(x, y, _backwardrenames(x, a),

232

return _chain(x, y, _backwardrenames(x, a),

234

_forwardcopies(a, y, match=match))

233

_forwardcopies(a, y, match=match))

235

234

236

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

235

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

237

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

236

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

238

and c2. This is its own function so extensions can easily wrap this call

237

and c2. This is its own function so extensions can easily wrap this call

239

to see what files mergecopies is about to process.

238

to see what files mergecopies is about to process.

240

239

241

Even though c1 and c2 are not used in this function, they are useful in

240

Even though c1 and c2 are not used in this function, they are useful in

242

other extensions for being able to read the file nodes of the changed files.

241

other extensions for being able to read the file nodes of the changed files.

243

242

244

"baselabel" can be passed to help distinguish the multiple computations

243

"baselabel" can be passed to help distinguish the multiple computations

245

done in the graft case.

244

done in the graft case.

246

"""

245

"""

247

u1 = sorted(addedinm1 - addedinm2)

246

u1 = sorted(addedinm1 - addedinm2)

248

u2 = sorted(addedinm2 - addedinm1)

247

u2 = sorted(addedinm2 - addedinm1)

249

248

250

header = " unmatched files in %s"

249

header = " unmatched files in %s"

251

if baselabel:

250

if baselabel:

252

header += ' (from %s)' % baselabel

251

header += ' (from %s)' % baselabel

253

if u1:

252

if u1:

254

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

253

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

255

if u2:

254

if u2:

256

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

255

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

257

return u1, u2

256

return u1, u2

258

257

259

def _makegetfctx(ctx):

258

def _makegetfctx(ctx):

260

"""return a 'getfctx' function suitable for _checkcopies usage

259

"""return a 'getfctx' function suitable for _checkcopies usage

261

260

262

We have to re-setup the function building 'filectx' for each

261

We have to re-setup the function building 'filectx' for each

263

'_checkcopies' to ensure the linkrev adjustment is properly setup for

262

'_checkcopies' to ensure the linkrev adjustment is properly setup for

264

each. Linkrev adjustment is important to avoid bug in rename

263

each. Linkrev adjustment is important to avoid bug in rename

265

detection. Moreover, having a proper '_ancestrycontext' setup ensures

264

detection. Moreover, having a proper '_ancestrycontext' setup ensures

266

the performance impact of this adjustment is kept limited. Without it,

265

the performance impact of this adjustment is kept limited. Without it,

267

each file could do a full dag traversal making the time complexity of

266

each file could do a full dag traversal making the time complexity of

268

the operation explode (see issue4537).

267

the operation explode (see issue4537).

269

268

270

This function exists here mostly to limit the impact on stable. Feel

269

This function exists here mostly to limit the impact on stable. Feel

271

free to refactor on default.

270

free to refactor on default.

272

"""

271

"""

273

rev = ctx.rev()

272

rev = ctx.rev()

274

repo = ctx._repo

273

repo = ctx._repo

275

ac = getattr(ctx, '_ancestrycontext', None)

274

ac = getattr(ctx, '_ancestrycontext', None)

276

if ac is None:

275

if ac is None:

277

revs = [rev]

276

revs = [rev]

278

if rev is None:

277

if rev is None:

279

revs = [p.rev() for p in ctx.parents()]

278

revs = [p.rev() for p in ctx.parents()]

280

ac = repo.changelog.ancestors(revs, inclusive=True)

279

ac = repo.changelog.ancestors(revs, inclusive=True)

281

ctx._ancestrycontext = ac

280

ctx._ancestrycontext = ac

282

def makectx(f, n):

281

def makectx(f, n):

283

if n in node.wdirnodes: # in a working context?

282

if n in node.wdirnodes: # in a working context?

284

if ctx.rev() is None:

283

if ctx.rev() is None:

285

return ctx.filectx(f)

284

return ctx.filectx(f)

286

return repo[None][f]

285

return repo[None][f]

287

fctx = repo.filectx(f, fileid=n)

286

fctx = repo.filectx(f, fileid=n)

288

# setup only needed for filectx not create from a changectx

287

# setup only needed for filectx not create from a changectx

289

fctx._ancestrycontext = ac

288

fctx._ancestrycontext = ac

290

fctx._descendantrev = rev

289

fctx._descendantrev = rev

291

return fctx

290

return fctx

292

return util.lrucachefunc(makectx)

291

return util.lrucachefunc(makectx)

293

292

294

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

293

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

295

"""combine partial copy paths"""

294

"""combine partial copy paths"""

296

remainder = {}

295

remainder = {}

297

for f in copyfrom:

296

for f in copyfrom:

298

if f in copyto:

297

if f in copyto:

299

finalcopy[copyto[f]] = copyfrom[f]

298

finalcopy[copyto[f]] = copyfrom[f]

300

del copyto[f]

299

del copyto[f]

301

for f in incompletediverge:

300

for f in incompletediverge:

302

assert f not in diverge

301

assert f not in diverge

303

ic = incompletediverge[f]

302

ic = incompletediverge[f]

304

if ic[0] in copyto:

303

if ic[0] in copyto:

305

diverge[f] = [copyto[ic[0]], ic[1]]

304

diverge[f] = [copyto[ic[0]], ic[1]]

306

else:

305

else:

307

remainder[f] = ic

306

remainder[f] = ic

308

return remainder

307

return remainder

309

308

310

def mergecopies(repo, c1, c2, base):

309

def mergecopies(repo, c1, c2, base):

311

"""

310

"""

312

The function calling different copytracing algorithms on the basis of config

311

The function calling different copytracing algorithms on the basis of config

313

which find moves and copies between context c1 and c2 that are relevant for

312

which find moves and copies between context c1 and c2 that are relevant for

314

merging. 'base' will be used as the merge base.

313

merging. 'base' will be used as the merge base.

315

314

316

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

315

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

317

files that were moved/ copied in one merge parent and modified in another.

316

files that were moved/ copied in one merge parent and modified in another.

318

For example:

317

For example:

319

318

320

o ---> 4 another commit

319

o ---> 4 another commit

321

|

320

|

322

| o ---> 3 commit that modifies a.txt

321

| o ---> 3 commit that modifies a.txt

323

| /

322

| /

324

o / ---> 2 commit that moves a.txt to b.txt

323

o / ---> 2 commit that moves a.txt to b.txt

325

|/

324

|/

326

o ---> 1 merge base

325

o ---> 1 merge base

327

326

328

If we try to rebase revision 3 on revision 4, since there is no a.txt in

327

If we try to rebase revision 3 on revision 4, since there is no a.txt in

329

revision 4, and if user have copytrace disabled, we prints the following

328

revision 4, and if user have copytrace disabled, we prints the following

330

message:

329

message:

331

330

332

```other changed <file> which local deleted```

331

```other changed <file> which local deleted```

333

332

334

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

333

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

335

"dirmove".

334

"dirmove".

336

335

337

"copy" is a mapping from destination name -> source name,

336

"copy" is a mapping from destination name -> source name,

338

where source is in c1 and destination is in c2 or vice-versa.

337

where source is in c1 and destination is in c2 or vice-versa.

339

338

340

"movewithdir" is a mapping from source name -> destination name,

339

"movewithdir" is a mapping from source name -> destination name,

341

where the file at source present in one context but not the other

340

where the file at source present in one context but not the other

342

needs to be moved to destination by the merge process, because the

341

needs to be moved to destination by the merge process, because the

343

other context moved the directory it is in.

342

other context moved the directory it is in.

344

343

345

"diverge" is a mapping of source name -> list of destination names

344

"diverge" is a mapping of source name -> list of destination names

346

for divergent renames.

345

for divergent renames.

347

346

348

"renamedelete" is a mapping of source name -> list of destination

347

"renamedelete" is a mapping of source name -> list of destination

349

names for files deleted in c1 that were renamed in c2 or vice-versa.

348

names for files deleted in c1 that were renamed in c2 or vice-versa.

350

349

351

"dirmove" is a mapping of detected source dir -> destination dir renames.

350

"dirmove" is a mapping of detected source dir -> destination dir renames.

352

This is needed for handling changes to new files previously grafted into

351

This is needed for handling changes to new files previously grafted into

353

renamed directories.

352

renamed directories.

354

"""

353

"""

355

# avoid silly behavior for update from empty dir

354

# avoid silly behavior for update from empty dir

356

if not c1 or not c2 or c1 == c2:

355

if not c1 or not c2 or c1 == c2:

357

return {}, {}, {}, {}, {}

356

return {}, {}, {}, {}, {}

358

357

359

# avoid silly behavior for parent -> working dir

358

# avoid silly behavior for parent -> working dir

360

if c2.node() is None and c1.node() == repo.dirstate.p1():

359

if c2.node() is None and c1.node() == repo.dirstate.p1():

361

return repo.dirstate.copies(), {}, {}, {}, {}

360

return repo.dirstate.copies(), {}, {}, {}, {}

362

361

363

copytracing = repo.ui.config('experimental', 'copytrace')

362

copytracing = repo.ui.config('experimental', 'copytrace')

364

363

365

# Copy trace disabling is explicitly below the node == p1 logic above

364

# Copy trace disabling is explicitly below the node == p1 logic above

366

# because the logic above is required for a simple copy to be kept across a

365

# because the logic above is required for a simple copy to be kept across a

367

# rebase.

366

# rebase.

368

if copytracing == 'off':

367

if copytracing == 'off':

369

return {}, {}, {}, {}, {}

368

return {}, {}, {}, {}, {}

370

elif copytracing == 'heuristics':

369

elif copytracing == 'heuristics':

371

# Do full copytracing if only ~~draft~~s are involved as ~~that will be fast~~

370

# Do full copytracing if only non-public revisions are involved as

372

# enough and will also cover the copies which c~~an be missed by~~

371

# that will be fast enough and will also cover the copies which could

373

# heuristics

372

# be missed by heuristics

374

if _isfullcopytraceable(repo, c1, base):

373

if _isfullcopytraceable(repo, c1, base):

375

return _fullcopytracing(repo, c1, c2, base)

374

return _fullcopytracing(repo, c1, c2, base)

376

return _heuristicscopytracing(repo, c1, c2, base)

375

return _heuristicscopytracing(repo, c1, c2, base)

377

else:

376

else:

378

return _fullcopytracing(repo, c1, c2, base)

377

return _fullcopytracing(repo, c1, c2, base)

379

378

380

def _isfullcopytraceable(repo, c1, base):

379

def _isfullcopytraceable(repo, c1, base):

381

""" Checks that if base, source and destination are all ~~draft~~ branches, if

380

""" Checks that if base, source and destination are all no-public branches,

382

yes let's use the full copytrace algorithm for increased capabilities ~~since~~

381

if yes let's use the full copytrace algorithm for increased capabilities

383

it will be fast enough.

382

since it will be fast enough.

384

"""

383

"""

385

if c1.rev() is None:

384

if c1.rev() is None:

386

c1 = c1.p1()

385

c1 = c1.p1()

387

386

if c1.mutable() and base.mutable():

388

nonpublicphases = set([phases.draft, phases.secret])

389

390

if (c1.phase() in nonpublicphases) and (base.phase() in nonpublicphases):

391

sourcecommitlimit = repo.ui.configint('experimental',

387

sourcecommitlimit = repo.ui.configint('experimental',

392

'copytrace.sourcecommitlimit')

388

'copytrace.sourcecommitlimit')

393

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

389

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

394

return commits < sourcecommitlimit

390

return commits < sourcecommitlimit

395

return False

391

return False

396

392

397

def _fullcopytracing(repo, c1, c2, base):

393

def _fullcopytracing(repo, c1, c2, base):

398

""" The full copytracing algorithm which finds all the new files that were

394

""" The full copytracing algorithm which finds all the new files that were

399

added from merge base up to the top commit and for each file it checks if

395

added from merge base up to the top commit and for each file it checks if

400

this file was copied from another file.

396

this file was copied from another file.

401

397

402

This is pretty slow when a lot of changesets are involved but will track all

398

This is pretty slow when a lot of changesets are involved but will track all

403

the copies.

399

the copies.

404

"""

400

"""

405

# In certain scenarios (e.g. graft, update or rebase), base can be

401

# In certain scenarios (e.g. graft, update or rebase), base can be

406

# overridden We still need to know a real common ancestor in this case We

402

# overridden We still need to know a real common ancestor in this case We

407

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

403

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

408

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

404

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

409

# caller may not know if the revision passed in lieu of the CA is a genuine

405

# caller may not know if the revision passed in lieu of the CA is a genuine

410

# common ancestor or not without explicitly checking it, it's better to

406

# common ancestor or not without explicitly checking it, it's better to

411

# determine that here.

407

# determine that here.

412

#

408

#

413

# base.descendant(wc) and base.descendant(base) are False, work around that

409

# base.descendant(wc) and base.descendant(base) are False, work around that

414

_c1 = c1.p1() if c1.rev() is None else c1

410

_c1 = c1.p1() if c1.rev() is None else c1

415

_c2 = c2.p1() if c2.rev() is None else c2

411

_c2 = c2.p1() if c2.rev() is None else c2

416

# an endpoint is "dirty" if it isn't a descendant of the merge base

412

# an endpoint is "dirty" if it isn't a descendant of the merge base

417

# if we have a dirty endpoint, we need to trigger graft logic, and also

413

# if we have a dirty endpoint, we need to trigger graft logic, and also

418

# keep track of which endpoint is dirty

414

# keep track of which endpoint is dirty

419

dirtyc1 = not (base == _c1 or base.descendant(_c1))

415

dirtyc1 = not (base == _c1 or base.descendant(_c1))

420

dirtyc2 = not (base == _c2 or base.descendant(_c2))

416

dirtyc2 = not (base == _c2 or base.descendant(_c2))

421

graft = dirtyc1 or dirtyc2

417

graft = dirtyc1 or dirtyc2

422

tca = base

418

tca = base

423

if graft:

419

if graft:

424

tca = _c1.ancestor(_c2)

420

tca = _c1.ancestor(_c2)

425

421

426

limit = _findlimit(repo, c1.rev(), c2.rev())

422

limit = _findlimit(repo, c1.rev(), c2.rev())

427

if limit is None:

423

if limit is None:

428

# no common ancestor, no copies

424

# no common ancestor, no copies

429

return {}, {}, {}, {}, {}

425

return {}, {}, {}, {}, {}

430

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

426

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

431

427

432

m1 = c1.manifest()

428

m1 = c1.manifest()

433

m2 = c2.manifest()

429

m2 = c2.manifest()

434

mb = base.manifest()

430

mb = base.manifest()

435

431

436

# gather data from _checkcopies:

432

# gather data from _checkcopies:

437

# - diverge = record all diverges in this dict

433

# - diverge = record all diverges in this dict

438

# - copy = record all non-divergent copies in this dict

434

# - copy = record all non-divergent copies in this dict

439

# - fullcopy = record all copies in this dict

435

# - fullcopy = record all copies in this dict

440

# - incomplete = record non-divergent partial copies here

436

# - incomplete = record non-divergent partial copies here

441

# - incompletediverge = record divergent partial copies here

437

# - incompletediverge = record divergent partial copies here

442

diverge = {} # divergence data is shared

438

diverge = {} # divergence data is shared

443

incompletediverge = {}

439

incompletediverge = {}

444

data1 = {'copy': {},

440

data1 = {'copy': {},

445

'fullcopy': {},

441

'fullcopy': {},

446

'incomplete': {},

442

'incomplete': {},

447

'diverge': diverge,

443

'diverge': diverge,

448

'incompletediverge': incompletediverge,

444

'incompletediverge': incompletediverge,

449

}

445

}

450

data2 = {'copy': {},

446

data2 = {'copy': {},

451

'fullcopy': {},

447

'fullcopy': {},

452

'incomplete': {},

448

'incomplete': {},

453

'diverge': diverge,

449

'diverge': diverge,

454

'incompletediverge': incompletediverge,

450

'incompletediverge': incompletediverge,

455

}

451

}

456

452

457

# find interesting file sets from manifests

453

# find interesting file sets from manifests

458

addedinm1 = m1.filesnotin(mb)

454

addedinm1 = m1.filesnotin(mb)

459

addedinm2 = m2.filesnotin(mb)

455

addedinm2 = m2.filesnotin(mb)

460

bothnew = sorted(addedinm1 & addedinm2)

456

bothnew = sorted(addedinm1 & addedinm2)

461

if tca == base:

457

if tca == base:

462

# unmatched file from base

458

# unmatched file from base

463

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

459

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

464

u1u, u2u = u1r, u2r

460

u1u, u2u = u1r, u2r

465

else:

461

else:

466

# unmatched file from base (DAG rotation in the graft case)

462

# unmatched file from base (DAG rotation in the graft case)

467

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

463

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

468

baselabel='base')

464

baselabel='base')

469

# unmatched file from topological common ancestors (no DAG rotation)

465

# unmatched file from topological common ancestors (no DAG rotation)

470

# need to recompute this for directory move handling when grafting

466

# need to recompute this for directory move handling when grafting

471

mta = tca.manifest()

467

mta = tca.manifest()

472

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

468

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

473

m2.filesnotin(mta),

469

m2.filesnotin(mta),

474

baselabel='topological common ancestor')

470

baselabel='topological common ancestor')

475

471

476

for f in u1u:

472

for f in u1u:

477

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

473

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

478

474

479

for f in u2u:

475

for f in u2u:

480

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

476

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

481

477

482

copy = dict(data1['copy'])

478

copy = dict(data1['copy'])

483

copy.update(data2['copy'])

479

copy.update(data2['copy'])

484

fullcopy = dict(data1['fullcopy'])

480

fullcopy = dict(data1['fullcopy'])

485

fullcopy.update(data2['fullcopy'])

481

fullcopy.update(data2['fullcopy'])

486

482

487

if dirtyc1:

483

if dirtyc1:

488

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

484

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

489

incompletediverge)

485

incompletediverge)

490

else:

486

else:

491

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

487

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

492

incompletediverge)

488

incompletediverge)

493

489

494

renamedelete = {}

490

renamedelete = {}

495

renamedeleteset = set()

491

renamedeleteset = set()

496

divergeset = set()

492

divergeset = set()

497

for of, fl in list(diverge.items()):

493

for of, fl in list(diverge.items()):

498

if len(fl) == 1 or of in c1 or of in c2:

494

if len(fl) == 1 or of in c1 or of in c2:

499

del diverge[of] # not actually divergent, or not a rename

495

del diverge[of] # not actually divergent, or not a rename

500

if of not in c1 and of not in c2:

496

if of not in c1 and of not in c2:

501

# renamed on one side, deleted on the other side, but filter

497

# renamed on one side, deleted on the other side, but filter

502

# out files that have been renamed and then deleted

498

# out files that have been renamed and then deleted

503

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

499

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

504

renamedeleteset.update(fl) # reverse map for below

500

renamedeleteset.update(fl) # reverse map for below

505

else:

501

else:

506

divergeset.update(fl) # reverse map for below

502

divergeset.update(fl) # reverse map for below

507

503

508

if bothnew:

504

if bothnew:

509

repo.ui.debug(" unmatched files new in both:\n %s\n"

505

repo.ui.debug(" unmatched files new in both:\n %s\n"

510

% "\n ".join(bothnew))

506

% "\n ".join(bothnew))

511

bothdiverge = {}

507

bothdiverge = {}

512

bothincompletediverge = {}

508

bothincompletediverge = {}

513

remainder = {}

509

remainder = {}

514

both1 = {'copy': {},

510

both1 = {'copy': {},

515

'fullcopy': {},

511

'fullcopy': {},

516

'incomplete': {},

512

'incomplete': {},

517

'diverge': bothdiverge,

513

'diverge': bothdiverge,

518

'incompletediverge': bothincompletediverge

514

'incompletediverge': bothincompletediverge

519

}

515

}

520

both2 = {'copy': {},

516

both2 = {'copy': {},

521

'fullcopy': {},

517

'fullcopy': {},

522

'incomplete': {},

518

'incomplete': {},

523

'diverge': bothdiverge,

519

'diverge': bothdiverge,

524

'incompletediverge': bothincompletediverge

520

'incompletediverge': bothincompletediverge

525

}

521

}

526

for f in bothnew:

522

for f in bothnew:

527

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

523

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

528

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

524

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

529

if dirtyc1:

525

if dirtyc1:

530

# incomplete copies may only be found on the "dirty" side for bothnew

526

# incomplete copies may only be found on the "dirty" side for bothnew

531

assert not both2['incomplete']

527

assert not both2['incomplete']

532

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

528

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

533

bothincompletediverge)

529

bothincompletediverge)

534

elif dirtyc2:

530

elif dirtyc2:

535

assert not both1['incomplete']

531

assert not both1['incomplete']

536

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

532

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

537

bothincompletediverge)

533

bothincompletediverge)

538

else:

534

else:

539

# incomplete copies and divergences can't happen outside grafts

535

# incomplete copies and divergences can't happen outside grafts

540

assert not both1['incomplete']

536

assert not both1['incomplete']

541

assert not both2['incomplete']

537

assert not both2['incomplete']

542

assert not bothincompletediverge

538

assert not bothincompletediverge

543

for f in remainder:

539

for f in remainder:

544

assert f not in bothdiverge

540

assert f not in bothdiverge

545

ic = remainder[f]

541

ic = remainder[f]

546

if ic[0] in (m1 if dirtyc1 else m2):

542

if ic[0] in (m1 if dirtyc1 else m2):

547

# backed-out rename on one side, but watch out for deleted files

543

# backed-out rename on one side, but watch out for deleted files

548

bothdiverge[f] = ic

544

bothdiverge[f] = ic

549

for of, fl in bothdiverge.items():

545

for of, fl in bothdiverge.items():

550

if len(fl) == 2 and fl[0] == fl[1]:

546

if len(fl) == 2 and fl[0] == fl[1]:

551

copy[fl[0]] = of # not actually divergent, just matching renames

547

copy[fl[0]] = of # not actually divergent, just matching renames

552

548

553

if fullcopy and repo.ui.debugflag:

549

if fullcopy and repo.ui.debugflag:

554

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

550

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

555

"% = renamed and deleted):\n")

551

"% = renamed and deleted):\n")

556

for f in sorted(fullcopy):

552

for f in sorted(fullcopy):

557

note = ""

553

note = ""

558

if f in copy:

554

if f in copy:

559

note += "*"

555

note += "*"

560

if f in divergeset:

556

if f in divergeset:

561

note += "!"

557

note += "!"

562

if f in renamedeleteset:

558

if f in renamedeleteset:

563

note += "%"

559

note += "%"

564

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

560

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

565

note))

561

note))

566

del divergeset

562

del divergeset

567

563

568

if not fullcopy:

564

if not fullcopy:

569

return copy, {}, diverge, renamedelete, {}

565

return copy, {}, diverge, renamedelete, {}

570

566

571

repo.ui.debug(" checking for directory renames\n")

567

repo.ui.debug(" checking for directory renames\n")

572

568

573

# generate a directory move map

569

# generate a directory move map

574

d1, d2 = c1.dirs(), c2.dirs()

570

d1, d2 = c1.dirs(), c2.dirs()

575

# Hack for adding '', which is not otherwise added, to d1 and d2

571

# Hack for adding '', which is not otherwise added, to d1 and d2

576

d1.addpath('/')

572

d1.addpath('/')

577

d2.addpath('/')

573

d2.addpath('/')

578

invalid = set()

574

invalid = set()

579

dirmove = {}

575

dirmove = {}

580

576

581

# examine each file copy for a potential directory move, which is

577

# examine each file copy for a potential directory move, which is

582

# when all the files in a directory are moved to a new directory

578

# when all the files in a directory are moved to a new directory

583

for dst, src in fullcopy.iteritems():

579

for dst, src in fullcopy.iteritems():

584

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

580

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

585

if dsrc in invalid:

581

if dsrc in invalid:

586

# already seen to be uninteresting

582

# already seen to be uninteresting

587

continue

583

continue

588

elif dsrc in d1 and ddst in d1:

584

elif dsrc in d1 and ddst in d1:

589

# directory wasn't entirely moved locally

585

# directory wasn't entirely moved locally

590

invalid.add(dsrc + "/")

586

invalid.add(dsrc + "/")

591

elif dsrc in d2 and ddst in d2:

587

elif dsrc in d2 and ddst in d2:

592

# directory wasn't entirely moved remotely

588

# directory wasn't entirely moved remotely

593

invalid.add(dsrc + "/")

589

invalid.add(dsrc + "/")

594

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

590

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

595

# files from the same directory moved to two different places

591

# files from the same directory moved to two different places

596

invalid.add(dsrc + "/")

592

invalid.add(dsrc + "/")

597

else:

593

else:

598

# looks good so far

594

# looks good so far

599

dirmove[dsrc + "/"] = ddst + "/"

595

dirmove[dsrc + "/"] = ddst + "/"

600

596

601

for i in invalid:

597

for i in invalid:

602

if i in dirmove:

598

if i in dirmove:

603

del dirmove[i]

599

del dirmove[i]

604

del d1, d2, invalid

600

del d1, d2, invalid

605

601

606

if not dirmove:

602

if not dirmove:

607

return copy, {}, diverge, renamedelete, {}

603

return copy, {}, diverge, renamedelete, {}

608

604

609

for d in dirmove:

605

for d in dirmove:

610

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

606

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

611

(d, dirmove[d]))

607

(d, dirmove[d]))

612

608

613

movewithdir = {}

609

movewithdir = {}

614

# check unaccounted nonoverlapping files against directory moves

610

# check unaccounted nonoverlapping files against directory moves

615

for f in u1r + u2r:

611

for f in u1r + u2r:

616

if f not in fullcopy:

612

if f not in fullcopy:

617

for d in dirmove:

613

for d in dirmove:

618

if f.startswith(d):

614

if f.startswith(d):

619

# new file added in a directory that was moved, move it

615

# new file added in a directory that was moved, move it

620

df = dirmove[d] + f[len(d):]

616

df = dirmove[d] + f[len(d):]

621

if df not in copy:

617

if df not in copy:

622

movewithdir[f] = df

618

movewithdir[f] = df

623

repo.ui.debug((" pending file src: '%s' -> "

619

repo.ui.debug((" pending file src: '%s' -> "

624

"dst: '%s'\n") % (f, df))

620

"dst: '%s'\n") % (f, df))

625

break

621

break

626

622

627

return copy, movewithdir, diverge, renamedelete, dirmove

623

return copy, movewithdir, diverge, renamedelete, dirmove

628

624

629

def _heuristicscopytracing(repo, c1, c2, base):

625

def _heuristicscopytracing(repo, c1, c2, base):

630

""" Fast copytracing using filename heuristics

626

""" Fast copytracing using filename heuristics

631

627

632

Assumes that moves or renames are of following two types:

628

Assumes that moves or renames are of following two types:

633

629

634

1) Inside a directory only (same directory name but different filenames)

630

1) Inside a directory only (same directory name but different filenames)

635

2) Move from one directory to another

631

2) Move from one directory to another

636

(same filenames but different directory names)

632

(same filenames but different directory names)

637

633

638

Works only when there are no merge commits in the "source branch".

634

Works only when there are no merge commits in the "source branch".

639

Source branch is commits from base up to c2 not including base.

635

Source branch is commits from base up to c2 not including base.

640

636

641

If merge is involved it fallbacks to _fullcopytracing().

637

If merge is involved it fallbacks to _fullcopytracing().

642

638

643

Can be used by setting the following config:

639

Can be used by setting the following config:

644

640

645

[experimental]

641

[experimental]

646

copytrace = heuristics

642

copytrace = heuristics

647

"""

643

"""

648

644

649

if c1.rev() is None:

645

if c1.rev() is None:

650

c1 = c1.p1()

646

c1 = c1.p1()

651

if c2.rev() is None:

647

if c2.rev() is None:

652

c2 = c2.p1()

648

c2 = c2.p1()

653

649

654

copies = {}

650

copies = {}

655

651

656

changedfiles = set()

652

changedfiles = set()

657

m1 = c1.manifest()

653

m1 = c1.manifest()

658

if not repo.revs('%d::%d', base.rev(), c2.rev()):

654

if not repo.revs('%d::%d', base.rev(), c2.rev()):

659

# If base is not in c2 branch, we switch to fullcopytracing

655

# If base is not in c2 branch, we switch to fullcopytracing

660

repo.ui.debug("switching to full copytracing as base is not "

656

repo.ui.debug("switching to full copytracing as base is not "

661

"an ancestor of c2\n")

657

"an ancestor of c2\n")

662

return _fullcopytracing(repo, c1, c2, base)

658

return _fullcopytracing(repo, c1, c2, base)

663

659

664

ctx = c2

660

ctx = c2

665

while ctx != base:

661

while ctx != base:

666

if len(ctx.parents()) == 2:

662

if len(ctx.parents()) == 2:

667

# To keep things simple let's not handle merges

663

# To keep things simple let's not handle merges

668

repo.ui.debug("switching to full copytracing because of merges\n")

664

repo.ui.debug("switching to full copytracing because of merges\n")

669

return _fullcopytracing(repo, c1, c2, base)

665

return _fullcopytracing(repo, c1, c2, base)

670

changedfiles.update(ctx.files())

666

changedfiles.update(ctx.files())

671

ctx = ctx.p1()

667

ctx = ctx.p1()

672

668

673

cp = _forwardcopies(base, c2)

669

cp = _forwardcopies(base, c2)

674

for dst, src in cp.iteritems():

670

for dst, src in cp.iteritems():

675

if src in m1:

671

if src in m1:

676

copies[dst] = src

672

copies[dst] = src

677

673

678

# file is missing if it isn't present in the destination, but is present in

674

# file is missing if it isn't present in the destination, but is present in

679

# the base and present in the source.

675

# the base and present in the source.

680

# Presence in the base is important to exclude added files, presence in the

676

# Presence in the base is important to exclude added files, presence in the

681

# source is important to exclude removed files.

677

# source is important to exclude removed files.

682

missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,

678

missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,

683

changedfiles)

679

changedfiles)

684

680

685

if missingfiles:

681

if missingfiles:

686

basenametofilename = collections.defaultdict(list)

682

basenametofilename = collections.defaultdict(list)

687

dirnametofilename = collections.defaultdict(list)

683

dirnametofilename = collections.defaultdict(list)

688

684

689

for f in m1.filesnotin(base.manifest()):

685

for f in m1.filesnotin(base.manifest()):

690

basename = os.path.basename(f)

686

basename = os.path.basename(f)

691

dirname = os.path.dirname(f)

687

dirname = os.path.dirname(f)

692

basenametofilename[basename].append(f)

688

basenametofilename[basename].append(f)

693

dirnametofilename[dirname].append(f)

689

dirnametofilename[dirname].append(f)

694

690

695

# in case of a rebase/graft, base may not be a common ancestor

691

# in case of a rebase/graft, base may not be a common ancestor

696

anc = c1.ancestor(c2)

692

anc = c1.ancestor(c2)

697

693

698

for f in missingfiles:

694

for f in missingfiles:

699

basename = os.path.basename(f)

695

basename = os.path.basename(f)

700

dirname = os.path.dirname(f)

696

dirname = os.path.dirname(f)

701

samebasename = basenametofilename[basename]

697

samebasename = basenametofilename[basename]

702

samedirname = dirnametofilename[dirname]

698

samedirname = dirnametofilename[dirname]

703

movecandidates = samebasename + samedirname

699

movecandidates = samebasename + samedirname

704

# f is guaranteed to be present in c2, that's why

700

# f is guaranteed to be present in c2, that's why

705

# c2.filectx(f) won't fail

701

# c2.filectx(f) won't fail

706

f2 = c2.filectx(f)

702

f2 = c2.filectx(f)

707

for candidate in movecandidates:

703

for candidate in movecandidates:

708

f1 = c1.filectx(candidate)

704

f1 = c1.filectx(candidate)

709

if _related(f1, f2, anc.rev()):

705

if _related(f1, f2, anc.rev()):

710

# if there are a few related copies then we'll merge

706

# if there are a few related copies then we'll merge

711

# changes into all of them. This matches the behaviour

707

# changes into all of them. This matches the behaviour

712

# of upstream copytracing

708

# of upstream copytracing

713

copies[candidate] = f

709

copies[candidate] = f

714

710

715

return copies, {}, {}, {}, {}

711

return copies, {}, {}, {}, {}

716

712

717

def _related(f1, f2, limit):

713

def _related(f1, f2, limit):

718

"""return True if f1 and f2 filectx have a common ancestor

714

"""return True if f1 and f2 filectx have a common ancestor

719

715

720

Walk back to common ancestor to see if the two files originate

716

Walk back to common ancestor to see if the two files originate

721

from the same file. Since workingfilectx's rev() is None it messes

717

from the same file. Since workingfilectx's rev() is None it messes

722

up the integer comparison logic, hence the pre-step check for

718

up the integer comparison logic, hence the pre-step check for

723

None (f1 and f2 can only be workingfilectx's initially).

719

None (f1 and f2 can only be workingfilectx's initially).

724

"""

720

"""

725

721

726

if f1 == f2:

722

if f1 == f2:

727

return f1 # a match

723

return f1 # a match

728

724

729

g1, g2 = f1.ancestors(), f2.ancestors()

725

g1, g2 = f1.ancestors(), f2.ancestors()

730

try:

726

try:

731

f1r, f2r = f1.linkrev(), f2.linkrev()

727

f1r, f2r = f1.linkrev(), f2.linkrev()

732

728

733

if f1r is None:

729

if f1r is None:

734

f1 = next(g1)

730

f1 = next(g1)

735

if f2r is None:

731

if f2r is None:

736

f2 = next(g2)

732

f2 = next(g2)

737

733

738

while True:

734

while True:

739

f1r, f2r = f1.linkrev(), f2.linkrev()

735

f1r, f2r = f1.linkrev(), f2.linkrev()

740

if f1r > f2r:

736

if f1r > f2r:

741

f1 = next(g1)

737

f1 = next(g1)

742

elif f2r > f1r:

738

elif f2r > f1r:

743

f2 = next(g2)

739

f2 = next(g2)

744

elif f1 == f2:

740

elif f1 == f2:

745

return f1 # a match

741

return f1 # a match

746

elif f1r == f2r or f1r < limit or f2r < limit:

742

elif f1r == f2r or f1r < limit or f2r < limit:

747

return False # copy no longer relevant

743

return False # copy no longer relevant

748

except StopIteration:

744

except StopIteration:

749

return False

745

return False

750

746

751

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

747

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

752

"""

748

"""

753

check possible copies of f from msrc to mdst

749

check possible copies of f from msrc to mdst

754

750

755

srcctx = starting context for f in msrc

751

srcctx = starting context for f in msrc

756

dstctx = destination context for f in mdst

752

dstctx = destination context for f in mdst

757

f = the filename to check (as in msrc)

753

f = the filename to check (as in msrc)

758

base = the changectx used as a merge base

754

base = the changectx used as a merge base

759

tca = topological common ancestor for graft-like scenarios

755

tca = topological common ancestor for graft-like scenarios

760

remotebase = True if base is outside tca::srcctx, False otherwise

756

remotebase = True if base is outside tca::srcctx, False otherwise

761

limit = the rev number to not search beyond

757

limit = the rev number to not search beyond

762

data = dictionary of dictionary to store copy data. (see mergecopies)

758

data = dictionary of dictionary to store copy data. (see mergecopies)

763

759

764

note: limit is only an optimization, and provides no guarantee that

760

note: limit is only an optimization, and provides no guarantee that

765

irrelevant revisions will not be visited

761

irrelevant revisions will not be visited

766

there is no easy way to make this algorithm stop in a guaranteed way

762

there is no easy way to make this algorithm stop in a guaranteed way

767

once it "goes behind a certain revision".

763

once it "goes behind a certain revision".

768

"""

764

"""

769

765

770

msrc = srcctx.manifest()

766

msrc = srcctx.manifest()

771

mdst = dstctx.manifest()

767

mdst = dstctx.manifest()

772

mb = base.manifest()

768

mb = base.manifest()

773

mta = tca.manifest()

769

mta = tca.manifest()

774

# Might be true if this call is about finding backward renames,

770

# Might be true if this call is about finding backward renames,

775

# This happens in the case of grafts because the DAG is then rotated.

771

# This happens in the case of grafts because the DAG is then rotated.

776

# If the file exists in both the base and the source, we are not looking

772

# If the file exists in both the base and the source, we are not looking

777

# for a rename on the source side, but on the part of the DAG that is

773

# for a rename on the source side, but on the part of the DAG that is

778

# traversed backwards.

774

# traversed backwards.

779

#

775

#

780

# In the case there is both backward and forward renames (before and after

776

# In the case there is both backward and forward renames (before and after

781

# the base) this is more complicated as we must detect a divergence.

777

# the base) this is more complicated as we must detect a divergence.

782

# We use 'backwards = False' in that case.

778

# We use 'backwards = False' in that case.

783

backwards = not remotebase and base != tca and f in mb

779

backwards = not remotebase and base != tca and f in mb

784

getsrcfctx = _makegetfctx(srcctx)

780

getsrcfctx = _makegetfctx(srcctx)

785

getdstfctx = _makegetfctx(dstctx)

781

getdstfctx = _makegetfctx(dstctx)

786

782

787

if msrc[f] == mb.get(f) and not remotebase:

783

if msrc[f] == mb.get(f) and not remotebase:

788

# Nothing to merge

784

# Nothing to merge

789

return

785

return

790

786

791

of = None

787

of = None

792

seen = {f}

788

seen = {f}

793

for oc in getsrcfctx(f, msrc[f]).ancestors():

789

for oc in getsrcfctx(f, msrc[f]).ancestors():

794

ocr = oc.linkrev()

790

ocr = oc.linkrev()

795

of = oc.path()

791

of = oc.path()

796

if of in seen:

792

if of in seen:

797

# check limit late - grab last rename before

793

# check limit late - grab last rename before

798

if ocr < limit:

794

if ocr < limit:

799

break

795

break

800

continue

796

continue

801

seen.add(of)

797

seen.add(of)

802

798

803

# remember for dir rename detection

799

# remember for dir rename detection

804

if backwards:

800

if backwards:

805

data['fullcopy'][of] = f # grafting backwards through renames

801

data['fullcopy'][of] = f # grafting backwards through renames

806

else:

802

else:

807

data['fullcopy'][f] = of

803

data['fullcopy'][f] = of

808

if of not in mdst:

804

if of not in mdst:

809

continue # no match, keep looking

805

continue # no match, keep looking

810

if mdst[of] == mb.get(of):

806

if mdst[of] == mb.get(of):

811

return # no merge needed, quit early

807

return # no merge needed, quit early

812

c2 = getdstfctx(of, mdst[of])

808

c2 = getdstfctx(of, mdst[of])

813

# c2 might be a plain new file on added on destination side that is

809

# c2 might be a plain new file on added on destination side that is

814

# unrelated to the droids we are looking for.

810

# unrelated to the droids we are looking for.

815

cr = _related(oc, c2, tca.rev())

811

cr = _related(oc, c2, tca.rev())

816

if cr and (of == f or of == c2.path()): # non-divergent

812

if cr and (of == f or of == c2.path()): # non-divergent

817

if backwards:

813

if backwards:

818

data['copy'][of] = f

814

data['copy'][of] = f

819

elif of in mb:

815

elif of in mb:

820

data['copy'][f] = of

816

data['copy'][f] = of

821

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

817

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

822

data['copy'][of] = f

818

data['copy'][of] = f

823

del data['fullcopy'][f]

819

del data['fullcopy'][f]

824

data['fullcopy'][of] = f

820

data['fullcopy'][of] = f

825

else: # divergence w.r.t. graft CA on one side of topological CA

821

else: # divergence w.r.t. graft CA on one side of topological CA

826

for sf in seen:

822

for sf in seen:

827

if sf in mb:

823

if sf in mb:

828

assert sf not in data['diverge']

824

assert sf not in data['diverge']

829

data['diverge'][sf] = [f, of]

825

data['diverge'][sf] = [f, of]

830

break

826

break

831

return

827

return

832

828

833

if of in mta:

829

if of in mta:

834

if backwards or remotebase:

830

if backwards or remotebase:

835

data['incomplete'][of] = f

831

data['incomplete'][of] = f

836

else:

832

else:

837

for sf in seen:

833

for sf in seen:

838

if sf in mb:

834

if sf in mb:

839

if tca == base:

835

if tca == base:

840

data['diverge'].setdefault(sf, []).append(f)

836

data['diverge'].setdefault(sf, []).append(f)

841

else:

837

else:

842

data['incompletediverge'][sf] = [of, f]

838

data['incompletediverge'][sf] = [of, f]

843

return

839

return

844

840

845

def duplicatecopies(repo, rev, fromrev, skiprev=None):

841

def duplicatecopies(repo, rev, fromrev, skiprev=None):

846

'''reproduce copies from fromrev to rev in the dirstate

842

'''reproduce copies from fromrev to rev in the dirstate

847

843

848

If skiprev is specified, it's a revision that should be used to

844

If skiprev is specified, it's a revision that should be used to

849

filter copy records. Any copies that occur between fromrev and

845

filter copy records. Any copies that occur between fromrev and

850

skiprev will not be duplicated, even if they appear in the set of

846

skiprev will not be duplicated, even if they appear in the set of

851

copies between fromrev and rev.

847

copies between fromrev and rev.

852

'''

848

'''

853

exclude = {}

849

exclude = {}

854

if (skiprev is not None and

850

if (skiprev is not None and

855

repo.ui.config('experimental', 'copytrace') != 'off'):

851

repo.ui.config('experimental', 'copytrace') != 'off'):

856

# copytrace='off' skips this line, but not the entire function because

852

# copytrace='off' skips this line, but not the entire function because

857

# the line below is O(size of the repo) during a rebase, while the rest

853

# the line below is O(size of the repo) during a rebase, while the rest

858

# of the function is much faster (and is required for carrying copy

854

# of the function is much faster (and is required for carrying copy

859

# metadata across the rebase anyway).

855

# metadata across the rebase anyway).

860

exclude = pathcopies(repo[fromrev], repo[skiprev])

856

exclude = pathcopies(repo[fromrev], repo[skiprev])

861

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

857

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

862

# copies.pathcopies returns backward renames, so dst might not

858

# copies.pathcopies returns backward renames, so dst might not

863

# actually be in the dirstate

859

# actually be in the dirstate

864

if dst in exclude:

860

if dst in exclude:

865

continue

861

continue

866

if repo.dirstate[dst] in "nma":

862

if repo.dirstate[dst] in "nma":

867

repo.dirstate.copy(src, dst)

863

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
-                phases,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirnodes:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'off':
                     return {}, {}, {}, {}, {}
                 elif copytracing == 'heuristics':
-                    # Do full copytracing if only drafts are involved as that will be fast
+                    # Do full copytracing if only non-public revisions are involved as
-                    # enough and will also cover the copies which can be missed by
+                    # that will be fast enough and will also cover the copies which could
-                    # heuristics
+                    # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
-                """ Checks that if base, source and destination are all draft branches, if
+                """ Checks that if base, source and destination are all no-public branches,
-                yes let's use the full copytrace algorithm for increased capabilities since
+                if yes let's use the full copytrace algorithm for increased capabilities
-                it will be fast enough.
+                since it will be fast enough.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
+                if c1.mutable() and base.mutable():
-                nonpublicphases = set([phases.draft, phases.secret])
-                if (c1.phase() in nonpublicphases) and (base.phase() in nonpublicphases):
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.descendant(wc) and base.descendant(base) are False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not (base == _c1 or base.descendant(_c1))
                 dirtyc2 = not (base == _c2 or base.descendant(_c2))
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb)
                 addedinm2 = m2.filesnotin(mb)
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
                                                                 m2.filesnotin(mta),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
                                       changedfiles)
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     # in case of a rebase/graft, base may not be a common ancestor
                     anc = c1.ancestor(c2)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2, anc.rev()):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2, limit):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         elif f1 == f2:
                             return f1 # a match
                         elif f1r == f2r or f1r < limit or f2r < limit:
                             return False # copy no longer relevant
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, tca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if (skiprev is not None and
                     repo.ui.config('experimental', 'copytrace') != 'off'):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)