upstream/mercurial-mirror Commit - r30202:a005c33d

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import heapq

10

import heapq

11

12

from . import (

12

from . import (

13

node,

13

node,

14

pathutil,

14

pathutil,

15

scmutil,

15

scmutil,

16

util,

16

util,

17

)

17

)

18

19

def _findlimit(repo, a, b):

19

def _findlimit(repo, a, b):

20

"""

20

"""

21

Find the last revision that needs to be checked to ensure that a full

21

Find the last revision that needs to be checked to ensure that a full

22

transitive closure for file copies can be properly calculated.

22

transitive closure for file copies can be properly calculated.

23

Generally, this means finding the earliest revision number that's an

23

Generally, this means finding the earliest revision number that's an

24

ancestor of a or b but not both, except when a or b is a direct descendent

24

ancestor of a or b but not both, except when a or b is a direct descendent

25

of the other, in which case we can return the minimum revnum of a and b.

25

of the other, in which case we can return the minimum revnum of a and b.

26

None if no such revision exists.

26

None if no such revision exists.

27

"""

27

"""

28

29

# basic idea:

29

# basic idea:

30

# - mark a and b with different sides

30

# - mark a and b with different sides

31

# - if a parent's children are all on the same side, the parent is

31

# - if a parent's children are all on the same side, the parent is

32

# on that side, otherwise it is on no side

32

# on that side, otherwise it is on no side

33

# - walk the graph in topological order with the help of a heap;

33

# - walk the graph in topological order with the help of a heap;

34

# - add unseen parents to side map

34

# - add unseen parents to side map

35

# - clear side of any parent that has children on different sides

35

# - clear side of any parent that has children on different sides

36

# - track number of interesting revs that might still be on a side

36

# - track number of interesting revs that might still be on a side

37

# - track the lowest interesting rev seen

37

# - track the lowest interesting rev seen

38

# - quit when interesting revs is zero

38

# - quit when interesting revs is zero

39

40

cl = repo.changelog

40

cl = repo.changelog

41

working = len(cl) # pseudo rev for the working directory

41

working = len(cl) # pseudo rev for the working directory

42

if a is None:

42

if a is None:

43

a = working

43

a = working

44

if b is None:

44

if b is None:

45

b = working

45

b = working

46

47

side = {a: -1, b: 1}

47

side = {a: -1, b: 1}

48

visit = [-a, -b]

48

visit = [-a, -b]

49

heapq.heapify(visit)

49

heapq.heapify(visit)

50

interesting = len(visit)

50

interesting = len(visit)

51

hascommonancestor = False

51

hascommonancestor = False

52

limit = working

52

limit = working

53

54

while interesting:

54

while interesting:

55

r = -heapq.heappop(visit)

55

r = -heapq.heappop(visit)

56

if r == working:

56

if r == working:

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

58

else:

58

else:

59

parents = cl.parentrevs(r)

59

parents = cl.parentrevs(r)

60

for p in parents:

60

for p in parents:

61

if p < 0:

61

if p < 0:

62

continue

62

continue

63

if p not in side:

63

if p not in side:

64

# first time we see p; add it to visit

64

# first time we see p; add it to visit

65

side[p] = side[r]

65

side[p] = side[r]

66

if side[p]:

66

if side[p]:

67

interesting += 1

67

interesting += 1

68

heapq.heappush(visit, -p)

68

heapq.heappush(visit, -p)

69

elif side[p] and side[p] != side[r]:

69

elif side[p] and side[p] != side[r]:

70

# p was interesting but now we know better

70

# p was interesting but now we know better

71

side[p] = 0

71

side[p] = 0

72

interesting -= 1

72

interesting -= 1

73

hascommonancestor = True

73

hascommonancestor = True

74

if side[r]:

74

if side[r]:

75

limit = r # lowest rev visited

75

limit = r # lowest rev visited

76

interesting -= 1

76

interesting -= 1

77

78

if not hascommonancestor:

78

if not hascommonancestor:

79

return None

79

return None

80

81

# Consider the following flow (see test-commit-amend.t under issue4405):

81

# Consider the following flow (see test-commit-amend.t under issue4405):

82

# 1/ File 'a0' committed

82

# 1/ File 'a0' committed

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

84

# 3/ Move back to first commit

84

# 3/ Move back to first commit

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

87

#

87

#

88

# During the amend in step five, we will be in this state:

88

# During the amend in step five, we will be in this state:

89

#

89

#

90

# @ 3 temporary amend commit for a1-amend

90

# @ 3 temporary amend commit for a1-amend

91

# |

91

# |

92

# o 2 a1-amend

92

# o 2 a1-amend

93

# |

93

# |

94

# | o 1 a1

94

# | o 1 a1

95

# |/

95

# |/

96

# o 0 a0

96

# o 0 a0

97

#

97

#

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

99

# yet the filelog has the copy information in rev 1 and we will not look

99

# yet the filelog has the copy information in rev 1 and we will not look

100

# back far enough unless we also look at the a and b as candidates.

100

# back far enough unless we also look at the a and b as candidates.

101

# This only occurs when a is a descendent of b or visa-versa.

101

# This only occurs when a is a descendent of b or visa-versa.

102

return min(limit, a, b)

102

return min(limit, a, b)

103

104

def _chain(src, dst, a, b):

104

def _chain(src, dst, a, b):

105

'''chain two sets of copies a->b'''

105

'''chain two sets of copies a->b'''

106

t = a.copy()

106

t = a.copy()

107

for k, v in b.iteritems():

107

for k, v in b.iteritems():

108

if v in t:

108

if v in t:

109

# found a chain

109

# found a chain

110

if t[v] != k:

110

if t[v] != k:

111

# file wasn't renamed back to itself

111

# file wasn't renamed back to itself

112

t[k] = t[v]

112

t[k] = t[v]

113

if v not in dst:

113

if v not in dst:

114

# chain was a rename, not a copy

114

# chain was a rename, not a copy

115

del t[v]

115

del t[v]

116

if v in src:

116

if v in src:

117

# file is a copy of an existing file

117

# file is a copy of an existing file

118

t[k] = v

118

t[k] = v

119

120

# remove criss-crossed copies

120

# remove criss-crossed copies

121

for k, v in t.items():

121

for k, v in t.items():

122

if k in src and v in dst:

122

if k in src and v in dst:

123

del t[k]

123

del t[k]

124

125

return t

125

return t

126

127

def _tracefile(fctx, am, limit=-1):

127

def _tracefile(fctx, am, limit=-1):

128

'''return file context that is the ancestor of fctx present in ancestor

128

'''return file context that is the ancestor of fctx present in ancestor

129

manifest am, stopping after the first ancestor lower than limit'''

129

manifest am, stopping after the first ancestor lower than limit'''

130

131

for f in fctx.ancestors():

131

for f in fctx.ancestors():

132

if am.get(f.path(), None) == f.filenode():

132

if am.get(f.path(), None) == f.filenode():

133

return f

133

return f

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

135

return None

135

return None

136

137

def _dirstatecopies(d):

137

def _dirstatecopies(d):

138

ds = d._repo.dirstate

138

ds = d._repo.dirstate

139

c = ds.copies().copy()

139

c = ds.copies().copy()

140

for k in c.keys():

140

for k in c.keys():

141

if ds[k] not in 'anm':

141

if ds[k] not in 'anm':

142

del c[k]

142

del c[k]

143

return c

143

return c

144

145

def _computeforwardmissing(a, b, match=None):

145

def _computeforwardmissing(a, b, match=None):

146

"""Computes which files are in b but not a.

146

"""Computes which files are in b but not a.

147

This is its own function so extensions can easily wrap this call to see what

147

This is its own function so extensions can easily wrap this call to see what

148

files _forwardcopies is about to process.

148

files _forwardcopies is about to process.

149

"""

149

"""

150

ma = a.manifest()

150

ma = a.manifest()

151

mb = b.manifest()

151

mb = b.manifest()

152

if match:

152

if match:

153

ma = ma.matches(match)

153

ma = ma.matches(match)

154

mb = mb.matches(match)

154

mb = mb.matches(match)

155

return mb.filesnotin(ma)

155

return mb.filesnotin(ma)

156

157

def _forwardcopies(a, b, match=None):

157

def _forwardcopies(a, b, match=None):

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

159

160

# check for working copy

160

# check for working copy

161

w = None

161

w = None

162

if b.rev() is None:

162

if b.rev() is None:

163

w = b

163

w = b

164

b = w.p1()

164

b = w.p1()

165

if a == b:

165

if a == b:

166

# short-circuit to avoid issues with merge states

166

# short-circuit to avoid issues with merge states

167

return _dirstatecopies(w)

167

return _dirstatecopies(w)

168

169

# files might have to be traced back to the fctx parent of the last

169

# files might have to be traced back to the fctx parent of the last

170

# one-side-only changeset, but not further back than that

170

# one-side-only changeset, but not further back than that

171

limit = _findlimit(a._repo, a.rev(), b.rev())

171

limit = _findlimit(a._repo, a.rev(), b.rev())

172

if limit is None:

172

if limit is None:

173

limit = -1

173

limit = -1

174

am = a.manifest()

174

am = a.manifest()

175

176

# find where new files came from

176

# find where new files came from

177

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

178

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

179

cm = {}

179

cm = {}

180

181

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

182

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

183

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

184

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

185

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

186

# this comparison.

186

# this comparison.

187

forwardmissingmatch = match

187

forwardmissingmatch = match

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

for f in missing:

193

for f in missing:

194

fctx = b[f]

194

fctx = b[f]

195

fctx._ancestrycontext = ancestrycontext

195

fctx._ancestrycontext = ancestrycontext

196

ofctx = _tracefile(fctx, am, limit)

196

ofctx = _tracefile(fctx, am, limit)

197

if ofctx:

197

if ofctx:

198

cm[f] = ofctx.path()

198

cm[f] = ofctx.path()

199

200

# combine copies from dirstate if necessary

200

# combine copies from dirstate if necessary

201

if w is not None:

201

if w is not None:

202

cm = _chain(a, w, cm, _dirstatecopies(w))

202

cm = _chain(a, w, cm, _dirstatecopies(w))

203

204

return cm

204

return cm

205

206

def _backwardrenames(a, b):

206

def _backwardrenames(a, b):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

208

return {}

208

return {}

209

210

# Even though we're not taking copies into account, 1:n rename situations

210

# Even though we're not taking copies into account, 1:n rename situations

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

212

# arbitrarily pick one of the renames.

212

# arbitrarily pick one of the renames.

213

f = _forwardcopies(b, a)

213

f = _forwardcopies(b, a)

214

r = {}

214

r = {}

215

for k, v in sorted(f.iteritems()):

215

for k, v in sorted(f.iteritems()):

216

# remove copies

216

# remove copies

217

if v in a:

217

if v in a:

218

continue

218

continue

219

r[v] = k

219

r[v] = k

220

return r

220

return r

221

222

def pathcopies(x, y, match=None):

222

def pathcopies(x, y, match=None):

223

'''find {dst@y: src@x} copy mapping for directed compare'''

223

'''find {dst@y: src@x} copy mapping for directed compare'''

224

if x == y or not x or not y:

224

if x == y or not x or not y:

225

return {}

225

return {}

226

a = y.ancestor(x)

226

a = y.ancestor(x)

227

if a == x:

227

if a == x:

228

return _forwardcopies(x, y, match=match)

228

return _forwardcopies(x, y, match=match)

229

if a == y:

229

if a == y:

230

return _backwardrenames(x, y)

230

return _backwardrenames(x, y)

231

return _chain(x, y, _backwardrenames(x, a),

231

return _chain(x, y, _backwardrenames(x, a),

232

_forwardcopies(a, y, match=match))

232

_forwardcopies(a, y, match=match))

233

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

236

and c2. This is its own function so extensions can easily wrap this call

236

and c2. This is its own function so extensions can easily wrap this call

237

to see what files mergecopies is about to process.

237

to see what files mergecopies is about to process.

238

239

Even though c1 and c2 are not used in this function, they are useful in

239

Even though c1 and c2 are not used in this function, they are useful in

240

other extensions for being able to read the file nodes of the changed files.

240

other extensions for being able to read the file nodes of the changed files.

241

242

"baselabel" can be passed to help distinguish the multiple computations

242

"baselabel" can be passed to help distinguish the multiple computations

243

done in the graft case.

243

done in the graft case.

244

"""

244

"""

245

u1 = sorted(addedinm1 - addedinm2)

245

u1 = sorted(addedinm1 - addedinm2)

246

u2 = sorted(addedinm2 - addedinm1)

246

u2 = sorted(addedinm2 - addedinm1)

247

248

header = " unmatched files in %s"

248

header = " unmatched files in %s"

249

if baselabel:

249

if baselabel:

250

header += ' (from %s)' % baselabel

250

header += ' (from %s)' % baselabel

251

if u1:

251

if u1:

252

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

252

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

253

if u2:

253

if u2:

254

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

254

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

255

return u1, u2

255

return u1, u2

256

257

def _makegetfctx(ctx):

257

def _makegetfctx(ctx):

258

"""return a 'getfctx' function suitable for _checkcopies usage

258

"""return a 'getfctx' function suitable for _checkcopies usage

259

260

We have to re-setup the function building 'filectx' for each

260

We have to re-setup the function building 'filectx' for each

261

'_checkcopies' to ensure the linkrev adjustment is properly setup for

261

'_checkcopies' to ensure the linkrev adjustment is properly setup for

262

each. Linkrev adjustment is important to avoid bug in rename

262

each. Linkrev adjustment is important to avoid bug in rename

263

detection. Moreover, having a proper '_ancestrycontext' setup ensures

263

detection. Moreover, having a proper '_ancestrycontext' setup ensures

264

the performance impact of this adjustment is kept limited. Without it,

264

the performance impact of this adjustment is kept limited. Without it,

265

each file could do a full dag traversal making the time complexity of

265

each file could do a full dag traversal making the time complexity of

266

the operation explode (see issue4537).

266

the operation explode (see issue4537).

267

268

This function exists here mostly to limit the impact on stable. Feel

268

This function exists here mostly to limit the impact on stable. Feel

269

free to refactor on default.

269

free to refactor on default.

270

"""

270

"""

271

rev = ctx.rev()

271

rev = ctx.rev()

272

repo = ctx._repo

272

repo = ctx._repo

273

ac = getattr(ctx, '_ancestrycontext', None)

273

ac = getattr(ctx, '_ancestrycontext', None)

274

if ac is None:

274

if ac is None:

275

revs = [rev]

275

revs = [rev]

276

if rev is None:

276

if rev is None:

277

revs = [p.rev() for p in ctx.parents()]

277

revs = [p.rev() for p in ctx.parents()]

278

ac = repo.changelog.ancestors(revs, inclusive=True)

278

ac = repo.changelog.ancestors(revs, inclusive=True)

279

ctx._ancestrycontext = ac

279

ctx._ancestrycontext = ac

280

def makectx(f, n):

280

def makectx(f, n):

281

if len(n) != 20: # in a working context?

281

if len(n) != 20: # in a working context?

282

if ctx.rev() is None:

282

if ctx.rev() is None:

283

return ctx.filectx(f)

283

return ctx.filectx(f)

284

return repo[None][f]

284

return repo[None][f]

285

fctx = repo.filectx(f, fileid=n)

285

fctx = repo.filectx(f, fileid=n)

286

# setup only needed for filectx not create from a changectx

286

# setup only needed for filectx not create from a changectx

287

fctx._ancestrycontext = ac

287

fctx._ancestrycontext = ac

288

fctx._descendantrev = rev

288

fctx._descendantrev = rev

289

return fctx

289

return fctx

290

return util.lrucachefunc(makectx)

290

return util.lrucachefunc(makectx)

291

292

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

293

"""combine partial copy paths"""

294

remainder = {}

295

for f in copyfrom:

296

if f in copyto:

297

finalcopy[copyto[f]] = copyfrom[f]

298

del copyto[f]

299

for f in incompletediverge:

300

assert f not in diverge

301

ic = incompletediverge[f]

302

if ic[0] in copyto:

303

diverge[f] = [copyto[ic[0]], ic[1]]

304

else:

305

remainder[f] = ic

306

return remainder

307

292

def mergecopies(repo, c1, c2, base):

308

def mergecopies(repo, c1, c2, base):

293

"""

309

"""

294

Find moves and copies between context c1 and c2 that are relevant

310

Find moves and copies between context c1 and c2 that are relevant

295

for merging. 'base' will be used as the merge base.

311

for merging. 'base' will be used as the merge base.

296

312

297

Returns four dicts: "copy", "movewithdir", "diverge", and

313

Returns four dicts: "copy", "movewithdir", "diverge", and

298

"renamedelete".

314

"renamedelete".

299

315

300

"copy" is a mapping from destination name -> source name,

316

"copy" is a mapping from destination name -> source name,

301

where source is in c1 and destination is in c2 or vice-versa.

317

where source is in c1 and destination is in c2 or vice-versa.

302

318

303

"movewithdir" is a mapping from source name -> destination name,

319

"movewithdir" is a mapping from source name -> destination name,

304

where the file at source present in one context but not the other

320

where the file at source present in one context but not the other

305

needs to be moved to destination by the merge process, because the

321

needs to be moved to destination by the merge process, because the

306

other context moved the directory it is in.

322

other context moved the directory it is in.

307

323

308

"diverge" is a mapping of source name -> list of destination names

324

"diverge" is a mapping of source name -> list of destination names

309

for divergent renames.

325

for divergent renames.

310

326

311

"renamedelete" is a mapping of source name -> list of destination

327

"renamedelete" is a mapping of source name -> list of destination

312

names for files deleted in c1 that were renamed in c2 or vice-versa.

328

names for files deleted in c1 that were renamed in c2 or vice-versa.

313

"""

329

"""

314

# avoid silly behavior for update from empty dir

330

# avoid silly behavior for update from empty dir

315

if not c1 or not c2 or c1 == c2:

331

if not c1 or not c2 or c1 == c2:

316

return {}, {}, {}, {}

332

return {}, {}, {}, {}

317

333

318

# avoid silly behavior for parent -> working dir

334

# avoid silly behavior for parent -> working dir

319

if c2.node() is None and c1.node() == repo.dirstate.p1():

335

if c2.node() is None and c1.node() == repo.dirstate.p1():

320

return repo.dirstate.copies(), {}, {}, {}

336

return repo.dirstate.copies(), {}, {}, {}

321

337

322

# Copy trace disabling is explicitly below the node == p1 logic above

338

# Copy trace disabling is explicitly below the node == p1 logic above

323

# because the logic above is required for a simple copy to be kept across a

339

# because the logic above is required for a simple copy to be kept across a

324

# rebase.

340

# rebase.

325

if repo.ui.configbool('experimental', 'disablecopytrace'):

341

if repo.ui.configbool('experimental', 'disablecopytrace'):

326

return {}, {}, {}, {}

342

return {}, {}, {}, {}

327

343

328

# In certain scenarios (e.g. graft, update or rebase), base can be

344

# In certain scenarios (e.g. graft, update or rebase), base can be

329

# overridden We still need to know a real common ancestor in this case We

345

# overridden We still need to know a real common ancestor in this case We

330

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

346

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

331

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

347

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

332

# caller may not know if the revision passed in lieu of the CA is a genuine

348

# caller may not know if the revision passed in lieu of the CA is a genuine

333

# common ancestor or not without explicitly checking it, it's better to

349

# common ancestor or not without explicitly checking it, it's better to

334

# determine that here.

350

# determine that here.

335

#

351

#

336

# base.descendant(wc) and base.descendant(base) are False, work around that

352

# base.descendant(wc) and base.descendant(base) are False, work around that

337

_c1 = c1.p1() if c1.rev() is None else c1

353

_c1 = c1.p1() if c1.rev() is None else c1

338

_c2 = c2.p1() if c2.rev() is None else c2

354

_c2 = c2.p1() if c2.rev() is None else c2

339

# an endpoint is "dirty" if it isn't a descendant of the merge base

355

# an endpoint is "dirty" if it isn't a descendant of the merge base

340

# if we have a dirty endpoint, we need to trigger graft logic, and also

356

# if we have a dirty endpoint, we need to trigger graft logic, and also

341

# keep track of which endpoint is dirty

357

# keep track of which endpoint is dirty

342

dirtyc1 = not (base == _c1 or base.descendant(_c1))

358

dirtyc1 = not (base == _c1 or base.descendant(_c1))

343

dirtyc2 = not (base== _c2 or base.descendant(_c2))

359

dirtyc2 = not (base== _c2 or base.descendant(_c2))

344

graft = dirtyc1 or dirtyc2

360

graft = dirtyc1 or dirtyc2

345

tca = base

361

tca = base

346

if graft:

362

if graft:

347

tca = _c1.ancestor(_c2)

363

tca = _c1.ancestor(_c2)

348

364

349

limit = _findlimit(repo, c1.rev(), c2.rev())

365

limit = _findlimit(repo, c1.rev(), c2.rev())

350

if limit is None:

366

if limit is None:

351

# no common ancestor, no copies

367

# no common ancestor, no copies

352

return {}, {}, {}, {}

368

return {}, {}, {}, {}

353

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

369

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

354

370

355

m1 = c1.manifest()

371

m1 = c1.manifest()

356

m2 = c2.manifest()

372

m2 = c2.manifest()

357

mb = base.manifest()

373

mb = base.manifest()

358

374

359

# gather data from _checkcopies:

375

# gather data from _checkcopies:

360

# - diverge = record all diverges in this dict

376

# - diverge = record all diverges in this dict

361

# - copy = record all non-divergent copies in this dict

377

# - copy = record all non-divergent copies in this dict

362

# - fullcopy = record all copies in this dict

378

# - fullcopy = record all copies in this dict

379

# - incomplete = record non-divergent partial copies here

380

# - incompletediverge = record divergent partial copies here

363

diverge = {} # divergence data is shared

381

diverge = {} # divergence data is shared

382

incompletediverge = {}

364

data1 = {'copy': {},

383

data1 = {'copy': {},

365

'fullcopy': {},

384

'fullcopy': {},

385

'incomplete': {},

366

'diverge': diverge,

386

'diverge': diverge,

387

'incompletediverge': incompletediverge,

367

}

388

}

368

data2 = {'copy': {},

389

data2 = {'copy': {},

369

'fullcopy': {},

390

'fullcopy': {},

391

'incomplete': {},

370

'diverge': diverge,

392

'diverge': diverge,

393

'incompletediverge': incompletediverge,

371

}

394

}

372

395

373

# find interesting file sets from manifests

396

# find interesting file sets from manifests

374

addedinm1 = m1.filesnotin(mb)

397

addedinm1 = m1.filesnotin(mb)

375

addedinm2 = m2.filesnotin(mb)

398

addedinm2 = m2.filesnotin(mb)

376

bothnew = sorted(addedinm1 & addedinm2)

399

bothnew = sorted(addedinm1 & addedinm2)

377

if tca == base:

400

if tca == base:

378

# unmatched file from base

401

# unmatched file from base

379

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

402

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

380

u1u, u2u = u1r, u2r

403

u1u, u2u = u1r, u2r

381

else:

404

else:

382

# unmatched file from base (DAG rotation in the graft case)

405

# unmatched file from base (DAG rotation in the graft case)

383

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

406

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

384

baselabel='base')

407

baselabel='base')

385

# unmatched file from topological common ancestors (no DAG rotation)

408

# unmatched file from topological common ancestors (no DAG rotation)

386

# need to recompute this for directory move handling when grafting

409

# need to recompute this for directory move handling when grafting

387

mta = tca.manifest()

410

mta = tca.manifest()

388

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

411

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

389

m2.filesnotin(mta),

412

m2.filesnotin(mta),

390

baselabel='topological common ancestor')

413

baselabel='topological common ancestor')

391

414

392

for f in u1u:

415

for f in u1u:

393

_checkcopies(c1, f, m1, m2, base, tca, limit, data1)

416

_checkcopies(c1, f, m1, m2, base, tca, limit, data1)

394

417

395

for f in u2u:

418

for f in u2u:

396

_checkcopies(c2, f, m2, m1, base, tca, limit, data2)

419

_checkcopies(c2, f, m2, m1, base, tca, limit, data2)

397

420

398

copy = dict(data1['copy'].items() + data2['copy'].items())

421

copy = dict(data1['copy'].items() + data2['copy'].items())

399

fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())

422

fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())

400

423

424

if dirtyc1:

425

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

426

incompletediverge)

427

else:

428

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

429

incompletediverge)

430

401

renamedelete = {}

431

renamedelete = {}

402

renamedeleteset = set()

432

renamedeleteset = set()

403

divergeset = set()

433

divergeset = set()

404

for of, fl in diverge.items():

434

for of, fl in diverge.items():

405

if len(fl) == 1 or of in c1 or of in c2:

435

if len(fl) == 1 or of in c1 or of in c2:

406

del diverge[of] # not actually divergent, or not a rename

436

del diverge[of] # not actually divergent, or not a rename

407

if of not in c1 and of not in c2:

437

if of not in c1 and of not in c2:

408

# renamed on one side, deleted on the other side, but filter

438

# renamed on one side, deleted on the other side, but filter

409

# out files that have been renamed and then deleted

439

# out files that have been renamed and then deleted

410

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

440

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

411

renamedeleteset.update(fl) # reverse map for below

441

renamedeleteset.update(fl) # reverse map for below

412

else:

442

else:

413

divergeset.update(fl) # reverse map for below

443

divergeset.update(fl) # reverse map for below

414

444

415

if bothnew:

445

if bothnew:

416

repo.ui.debug(" unmatched files new in both:\n %s\n"

446

repo.ui.debug(" unmatched files new in both:\n %s\n"

417

% "\n ".join(bothnew))

447

% "\n ".join(bothnew))

418

bothdiverge = {}

448

bothdiverge = {}

419

bothdata = {'copy': {},

449

bothincompletediverge = {}

450

both1 = {'copy': {},

420

'fullcopy': {},

451

'fullcopy': {},

452

'incomplete': {},

421

'diverge': bothdiverge,

453

'diverge': bothdiverge,

454

'incompletediverge': bothincompletediverge

455

}

456

both2 = {'copy': {},

457

'fullcopy': {},

458

'incomplete': {},

459

'diverge': bothdiverge,

460

'incompletediverge': bothincompletediverge

422

}

461

}

423

for f in bothnew:

462

for f in bothnew:

424

_checkcopies(c1, f, m1, m2, base, tca, limit, both~~data~~)

463

_checkcopies(c1, f, m1, m2, base, tca, limit, both1)

425

_checkcopies(c2, f, m2, m1, base, tca, limit, both~~data~~)

464

_checkcopies(c2, f, m2, m1, base, tca, limit, both2)

465

if dirtyc1:

466

assert both2['incomplete'] == {}

467

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

468

bothincompletediverge)

469

else:

470

assert both1['incomplete'] == {}

471

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

472

bothincompletediverge)

473

for f in remainder:

474

assert f not in bothdiverge

475

ic = remainder[f]

476

if ic[0] in (m1 if dirtyc1 else m2):

477

# backed-out rename on one side, but watch out for deleted files

478

bothdiverge[f] = ic

426

for of, fl in bothdiverge.items():

479

for of, fl in bothdiverge.items():

427

if len(fl) == 2 and fl[0] == fl[1]:

480

if len(fl) == 2 and fl[0] == fl[1]:

428

copy[fl[0]] = of # not actually divergent, just matching renames

481

copy[fl[0]] = of # not actually divergent, just matching renames

429

482

430

if fullcopy and repo.ui.debugflag:

483

if fullcopy and repo.ui.debugflag:

431

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

484

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

432

"% = renamed and deleted):\n")

485

"% = renamed and deleted):\n")

433

for f in sorted(fullcopy):

486

for f in sorted(fullcopy):

434

note = ""

487

note = ""

435

if f in copy:

488

if f in copy:

436

note += "*"

489

note += "*"

437

if f in divergeset:

490

if f in divergeset:

438

note += "!"

491

note += "!"

439

if f in renamedeleteset:

492

if f in renamedeleteset:

440

note += "%"

493

note += "%"

441

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

494

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

442

note))

495

note))

443

del divergeset

496

del divergeset

444

497

445

if not fullcopy:

498

if not fullcopy:

446

return copy, {}, diverge, renamedelete

499

return copy, {}, diverge, renamedelete

447

500

448

repo.ui.debug(" checking for directory renames\n")

501

repo.ui.debug(" checking for directory renames\n")

449

502

450

# generate a directory move map

503

# generate a directory move map

451

d1, d2 = c1.dirs(), c2.dirs()

504

d1, d2 = c1.dirs(), c2.dirs()

452

# Hack for adding '', which is not otherwise added, to d1 and d2

505

# Hack for adding '', which is not otherwise added, to d1 and d2

453

d1.addpath('/')

506

d1.addpath('/')

454

d2.addpath('/')

507

d2.addpath('/')

455

invalid = set()

508

invalid = set()

456

dirmove = {}

509

dirmove = {}

457

510

458

# examine each file copy for a potential directory move, which is

511

# examine each file copy for a potential directory move, which is

459

# when all the files in a directory are moved to a new directory

512

# when all the files in a directory are moved to a new directory

460

for dst, src in fullcopy.iteritems():

513

for dst, src in fullcopy.iteritems():

461

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

514

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

462

if dsrc in invalid:

515

if dsrc in invalid:

463

# already seen to be uninteresting

516

# already seen to be uninteresting

464

continue

517

continue

465

elif dsrc in d1 and ddst in d1:

518

elif dsrc in d1 and ddst in d1:

466

# directory wasn't entirely moved locally

519

# directory wasn't entirely moved locally

467

invalid.add(dsrc + "/")

520

invalid.add(dsrc + "/")

468

elif dsrc in d2 and ddst in d2:

521

elif dsrc in d2 and ddst in d2:

469

# directory wasn't entirely moved remotely

522

# directory wasn't entirely moved remotely

470

invalid.add(dsrc + "/")

523

invalid.add(dsrc + "/")

471

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

524

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

472

# files from the same directory moved to two different places

525

# files from the same directory moved to two different places

473

invalid.add(dsrc + "/")

526

invalid.add(dsrc + "/")

474

else:

527

else:

475

# looks good so far

528

# looks good so far

476

dirmove[dsrc + "/"] = ddst + "/"

529

dirmove[dsrc + "/"] = ddst + "/"

477

530

478

for i in invalid:

531

for i in invalid:

479

if i in dirmove:

532

if i in dirmove:

480

del dirmove[i]

533

del dirmove[i]

481

del d1, d2, invalid

534

del d1, d2, invalid

482

535

483

if not dirmove:

536

if not dirmove:

484

return copy, {}, diverge, renamedelete

537

return copy, {}, diverge, renamedelete

485

538

486

for d in dirmove:

539

for d in dirmove:

487

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

540

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

488

(d, dirmove[d]))

541

(d, dirmove[d]))

489

542

490

movewithdir = {}

543

movewithdir = {}

491

# check unaccounted nonoverlapping files against directory moves

544

# check unaccounted nonoverlapping files against directory moves

492

for f in u1r + u2r:

545

for f in u1r + u2r:

493

if f not in fullcopy:

546

if f not in fullcopy:

494

for d in dirmove:

547

for d in dirmove:

495

if f.startswith(d):

548

if f.startswith(d):

496

# new file added in a directory that was moved, move it

549

# new file added in a directory that was moved, move it

497

df = dirmove[d] + f[len(d):]

550

df = dirmove[d] + f[len(d):]

498

if df not in copy:

551

if df not in copy:

499

movewithdir[f] = df

552

movewithdir[f] = df

500

repo.ui.debug((" pending file src: '%s' -> "

553

repo.ui.debug((" pending file src: '%s' -> "

501

"dst: '%s'\n") % (f, df))

554

"dst: '%s'\n") % (f, df))

502

break

555

break

503

556

504

return copy, movewithdir, diverge, renamedelete

557

return copy, movewithdir, diverge, renamedelete

505

558

506

def _related(f1, f2, limit):

559

def _related(f1, f2, limit):

507

"""return True if f1 and f2 filectx have a common ancestor

560

"""return True if f1 and f2 filectx have a common ancestor

508

561

509

Walk back to common ancestor to see if the two files originate

562

Walk back to common ancestor to see if the two files originate

510

from the same file. Since workingfilectx's rev() is None it messes

563

from the same file. Since workingfilectx's rev() is None it messes

511

up the integer comparison logic, hence the pre-step check for

564

up the integer comparison logic, hence the pre-step check for

512

None (f1 and f2 can only be workingfilectx's initially).

565

None (f1 and f2 can only be workingfilectx's initially).

513

"""

566

"""

514

567

515

if f1 == f2:

568

if f1 == f2:

516

return f1 # a match

569

return f1 # a match

517

570

518

g1, g2 = f1.ancestors(), f2.ancestors()

571

g1, g2 = f1.ancestors(), f2.ancestors()

519

try:

572

try:

520

f1r, f2r = f1.linkrev(), f2.linkrev()

573

f1r, f2r = f1.linkrev(), f2.linkrev()

521

574

522

if f1r is None:

575

if f1r is None:

523

f1 = next(g1)

576

f1 = next(g1)

524

if f2r is None:

577

if f2r is None:

525

f2 = next(g2)

578

f2 = next(g2)

526

579

527

while True:

580

while True:

528

f1r, f2r = f1.linkrev(), f2.linkrev()

581

f1r, f2r = f1.linkrev(), f2.linkrev()

529

if f1r > f2r:

582

if f1r > f2r:

530

f1 = next(g1)

583

f1 = next(g1)

531

elif f2r > f1r:

584

elif f2r > f1r:

532

f2 = next(g2)

585

f2 = next(g2)

533

elif f1 == f2:

586

elif f1 == f2:

534

return f1 # a match

587

return f1 # a match

535

elif f1r == f2r or f1r < limit or f2r < limit:

588

elif f1r == f2r or f1r < limit or f2r < limit:

536

return False # copy no longer relevant

589

return False # copy no longer relevant

537

except StopIteration:

590

except StopIteration:

538

return False

591

return False

539

592

540

def _checkcopies(ctx, f, m1, m2, base, tca, limit, data):

593

def _checkcopies(ctx, f, m1, m2, base, tca, limit, data):

541

"""

594

"""

542

check possible copies of f from m1 to m2

595

check possible copies of f from m1 to m2

543

596

544

ctx = starting context for f in m1

597

ctx = starting context for f in m1

545

f = the filename to check (as in m1)

598

f = the filename to check (as in m1)

546

m1 = the source manifest

599

m1 = the source manifest

547

m2 = the destination manifest

600

m2 = the destination manifest

548

base = the changectx used as a merge base

601

base = the changectx used as a merge base

549

tca = topological common ancestor for graft-like scenarios

602

tca = topological common ancestor for graft-like scenarios

550

limit = the rev number to not search beyond

603

limit = the rev number to not search beyond

551

data = dictionary of dictionary to store copy data. (see mergecopies)

604

data = dictionary of dictionary to store copy data. (see mergecopies)

552

605

553

note: limit is only an optimization, and there is no guarantee that

606

note: limit is only an optimization, and there is no guarantee that

554

irrelevant revisions will not be limited

607

irrelevant revisions will not be limited

555

there is no easy way to make this algorithm stop in a guaranteed way

608

there is no easy way to make this algorithm stop in a guaranteed way

556

once it "goes behind a certain revision".

609

once it "goes behind a certain revision".

557

"""

610

"""

558

611

559

mb = base.manifest()

612

mb = base.manifest()

560

# Might be true if this call is about finding backward renames,

613

# Might be true if this call is about finding backward renames,

561

# This happens in the case of grafts because the DAG is then rotated.

614

# This happens in the case of grafts because the DAG is then rotated.

562

# If the file exists in both the base and the source, we are not looking

615

# If the file exists in both the base and the source, we are not looking

563

# for a rename on the source side, but on the part of the DAG that is

616

# for a rename on the source side, but on the part of the DAG that is

564

# traversed backwards.

617

# traversed backwards.

565

#

618

#

566

# In the case there is both backward and forward renames (before and after

619

# In the case there is both backward and forward renames (before and after

567

# the base) this is more complicated as we must detect a divergence.

620

# the base) this is more complicated as we must detect a divergence.

568

# We use 'backwards = False' in that case.

621

# We use 'backwards = False' in that case.

569

backwards = base != tca and f in mb

622

backwards = base != tca and f in mb

570

getfctx = _makegetfctx(ctx)

623

getfctx = _makegetfctx(ctx)

571

624

572

of = None

625

of = None

573

seen = set([f])

626

seen = set([f])

574

for oc in getfctx(f, m1[f]).ancestors():

627

for oc in getfctx(f, m1[f]).ancestors():

575

ocr = oc.linkrev()

628

ocr = oc.linkrev()

576

of = oc.path()

629

of = oc.path()

577

if of in seen:

630

if of in seen:

578

# check limit late - grab last rename before

631

# check limit late - grab last rename before

579

if ocr < limit:

632

if ocr < limit:

580

break

633

break

581

continue

634

continue

582

seen.add(of)

635

seen.add(of)

583

636

584

# remember for dir rename detection

637

# remember for dir rename detection

585

if backwards:

638

if backwards:

586

data['fullcopy'][of] = f # grafting backwards through renames

639

data['fullcopy'][of] = f # grafting backwards through renames

587

else:

640

else:

588

data['fullcopy'][f] = of

641

data['fullcopy'][f] = of

589

if of not in m2:

642

if of not in m2:

590

continue # no match, keep looking

643

continue # no match, keep looking

591

if m2[of] == mb.get(of):

644

if m2[of] == mb.get(of):

592

return # no merge needed, quit early

645

return # no merge needed, quit early

593

c2 = getfctx(of, m2[of])

646

c2 = getfctx(of, m2[of])

594

# c2 might be a plain new file on added on destination side that is

647

# c2 might be a plain new file on added on destination side that is

595

# unrelated to the droids we are looking for.

648

# unrelated to the droids we are looking for.

596

cr = _related(oc, c2, tca.rev())

649

cr = _related(oc, c2, tca.rev())

597

if cr and (of == f or of == c2.path()): # non-divergent

650

if cr and (of == f or of == c2.path()): # non-divergent

598

if backwards:

651

if backwards:

599

data['copy'][of] = f

652

data['copy'][of] = f

600

elif of in mb:

653

elif of in mb:

601

data['copy'][f] = of

654

data['copy'][f] = of

602

else: # divergence w.r.t. graft CA on one side of topological CA

655

else: # divergence w.r.t. graft CA on one side of topological CA

603

for sf in seen:

656

for sf in seen:

604

if sf in mb:

657

if sf in mb:

605

assert sf not in data['diverge']

658

assert sf not in data['diverge']

606

data['diverge'][sf] = [f, of]

659

data['diverge'][sf] = [f, of]

607

break

660

break

608

return

661

return

609

662

610

if of in mb:

663

if of in mb:

611

data['diverge'].setdefault(of, []).append(f)

664

data['diverge'].setdefault(of, []).append(f)

612

665

613

def duplicatecopies(repo, rev, fromrev, skiprev=None):

666

def duplicatecopies(repo, rev, fromrev, skiprev=None):

614

'''reproduce copies from fromrev to rev in the dirstate

667

'''reproduce copies from fromrev to rev in the dirstate

615

668

616

If skiprev is specified, it's a revision that should be used to

669

If skiprev is specified, it's a revision that should be used to

617

filter copy records. Any copies that occur between fromrev and

670

filter copy records. Any copies that occur between fromrev and

618

skiprev will not be duplicated, even if they appear in the set of

671

skiprev will not be duplicated, even if they appear in the set of

619

copies between fromrev and rev.

672

copies between fromrev and rev.

620

'''

673

'''

621

exclude = {}

674

exclude = {}

622

if (skiprev is not None and

675

if (skiprev is not None and

623

not repo.ui.configbool('experimental', 'disablecopytrace')):

676

not repo.ui.configbool('experimental', 'disablecopytrace')):

624

# disablecopytrace skips this line, but not the entire function because

677

# disablecopytrace skips this line, but not the entire function because

625

# the line below is O(size of the repo) during a rebase, while the rest

678

# the line below is O(size of the repo) during a rebase, while the rest

626

# of the function is much faster (and is required for carrying copy

679

# of the function is much faster (and is required for carrying copy

627

# metadata across the rebase anyway).

680

# metadata across the rebase anyway).

628

exclude = pathcopies(repo[fromrev], repo[skiprev])

681

exclude = pathcopies(repo[fromrev], repo[skiprev])

629

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

682

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

630

# copies.pathcopies returns backward renames, so dst might not

683

# copies.pathcopies returns backward renames, so dst might not

631

# actually be in the dirstate

684

# actually be in the dirstate

632

if dst in exclude:

685

if dst in exclude:

633

continue

686

continue

634

if repo.dirstate[dst] in "nma":

687

if repo.dirstate[dst] in "nma":

635

repo.dirstate.copy(src, dst)

688

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import heapq
             from . import (
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 if match:
                     ma = ma.matches(match)
                     mb = mb.matches(match)
                 return mb.filesnotin(ma)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if not match and b.p1() == a and b.p2().node() == node.nullid:
                     forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if len(n) != 20:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
+            def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
+                """combine partial copy paths"""
+                remainder = {}
+                for f in copyfrom:
+                    if f in copyto:
+                        finalcopy[copyto[f]] = copyfrom[f]
+                        del copyto[f]
+                for f in incompletediverge:
+                    assert f not in diverge
+                    ic = incompletediverge[f]
+                    if ic[0] in copyto:
+                        diverge[f] = [copyto[ic[0]], ic[1]]
+                    else:
+                        remainder[f] = ic
+                return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging. 'base' will be used as the merge base.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}, {}, {}, {}
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.descendant(wc) and base.descendant(base) are False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not (base == _c1 or base.descendant(_c1))
                 dirtyc2 = not (base== _c2 or base.descendant(_c2))
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
+                # - incomplete = record non-divergent partial copies here
+                # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
+                incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
+                         'incomplete': {},
                          'diverge': diverge,
+                         'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
+                         'incomplete': {},
                          'diverge': diverge,
+                         'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb)
                 addedinm2 = m2.filesnotin(mb)
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
                                                                 m2.filesnotin(mta),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, f, m1, m2, base, tca, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, f, m2, m1, base, tca, limit, data2)
                 copy = dict(data1['copy'].items() + data2['copy'].items())
                 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
+                if dirtyc1:
+                    _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
+                                   incompletediverge)
+                else:
+                    _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
+                                   incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
-                bothdata = {'copy': {},
+                bothincompletediverge = {}
+                both1 = {'copy': {},
                          'fullcopy': {},
+                         'incomplete': {},
                          'diverge': bothdiverge,
+                         'incompletediverge': bothincompletediverge
+                        }
+                both2 = {'copy': {},
+                         'fullcopy': {},
+                         'incomplete': {},
+                         'diverge': bothdiverge,
+                         'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
-                    _checkcopies(c1, f, m1, m2, base, tca, limit, bothdata)
+                    _checkcopies(c1, f, m1, m2, base, tca, limit, both1)
-                    _checkcopies(c2, f, m2, m1, base, tca, limit, bothdata)
+                    _checkcopies(c2, f, m2, m1, base, tca, limit, both2)
+                if dirtyc1:
+                    assert both2['incomplete'] == {}
+                    remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
+                                               bothincompletediverge)
+                else:
+                    assert both1['incomplete'] == {}
+                    remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
+                                               bothincompletediverge)
+                for f in remainder:
+                    assert f not in bothdiverge
+                    ic = remainder[f]
+                    if ic[0] in (m1 if dirtyc1 else m2):
+                        # backed-out rename on one side, but watch out for deleted files
+                        bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def _related(f1, f2, limit):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         elif f1 == f2:
                             return f1 # a match
                         elif f1r == f2r or f1r < limit or f2r < limit:
                             return False # copy no longer relevant
                 except StopIteration:
                     return False
             def _checkcopies(ctx, f, m1, m2, base, tca, limit, data):
                 """
                 check possible copies of f from m1 to m2
                 ctx = starting context for f in m1
                 f = the filename to check (as in m1)
                 m1 = the source manifest
                 m2 = the destination manifest
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and there is no guarantee that
                 irrelevant revisions will not be limited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 mb = base.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = base != tca and f in mb
                 getfctx = _makegetfctx(ctx)
                 of = None
                 seen = set([f])
                 for oc in getfctx(f, m1[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getfctx(of, m2[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, tca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mb:
                     data['diverge'].setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if (skiprev is not None and
                     not repo.ui.configbool('experimental', 'disablecopytrace')):
                     # disablecopytrace skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)