upstream/mercurial-mirror Commit - r30137:f85f9e06

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import heapq

10

import heapq

11

12

from . import (

12

from . import (

13

node,

13

node,

14

pathutil,

14

pathutil,

15

scmutil,

15

scmutil,

16

util,

16

util,

17

)

17

)

18

19

def _findlimit(repo, a, b):

19

def _findlimit(repo, a, b):

20

"""

20

"""

21

Find the last revision that needs to be checked to ensure that a full

21

Find the last revision that needs to be checked to ensure that a full

22

transitive closure for file copies can be properly calculated.

22

transitive closure for file copies can be properly calculated.

23

Generally, this means finding the earliest revision number that's an

23

Generally, this means finding the earliest revision number that's an

24

ancestor of a or b but not both, except when a or b is a direct descendent

24

ancestor of a or b but not both, except when a or b is a direct descendent

25

of the other, in which case we can return the minimum revnum of a and b.

25

of the other, in which case we can return the minimum revnum of a and b.

26

None if no such revision exists.

26

None if no such revision exists.

27

"""

27

"""

28

29

# basic idea:

29

# basic idea:

30

# - mark a and b with different sides

30

# - mark a and b with different sides

31

# - if a parent's children are all on the same side, the parent is

31

# - if a parent's children are all on the same side, the parent is

32

# on that side, otherwise it is on no side

32

# on that side, otherwise it is on no side

33

# - walk the graph in topological order with the help of a heap;

33

# - walk the graph in topological order with the help of a heap;

34

# - add unseen parents to side map

34

# - add unseen parents to side map

35

# - clear side of any parent that has children on different sides

35

# - clear side of any parent that has children on different sides

36

# - track number of interesting revs that might still be on a side

36

# - track number of interesting revs that might still be on a side

37

# - track the lowest interesting rev seen

37

# - track the lowest interesting rev seen

38

# - quit when interesting revs is zero

38

# - quit when interesting revs is zero

39

40

cl = repo.changelog

40

cl = repo.changelog

41

working = len(cl) # pseudo rev for the working directory

41

working = len(cl) # pseudo rev for the working directory

42

if a is None:

42

if a is None:

43

a = working

43

a = working

44

if b is None:

44

if b is None:

45

b = working

45

b = working

46

47

side = {a: -1, b: 1}

47

side = {a: -1, b: 1}

48

visit = [-a, -b]

48

visit = [-a, -b]

49

heapq.heapify(visit)

49

heapq.heapify(visit)

50

interesting = len(visit)

50

interesting = len(visit)

51

hascommonancestor = False

51

hascommonancestor = False

52

limit = working

52

limit = working

53

54

while interesting:

54

while interesting:

55

r = -heapq.heappop(visit)

55

r = -heapq.heappop(visit)

56

if r == working:

56

if r == working:

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

58

else:

58

else:

59

parents = cl.parentrevs(r)

59

parents = cl.parentrevs(r)

60

for p in parents:

60

for p in parents:

61

if p < 0:

61

if p < 0:

62

continue

62

continue

63

if p not in side:

63

if p not in side:

64

# first time we see p; add it to visit

64

# first time we see p; add it to visit

65

side[p] = side[r]

65

side[p] = side[r]

66

if side[p]:

66

if side[p]:

67

interesting += 1

67

interesting += 1

68

heapq.heappush(visit, -p)

68

heapq.heappush(visit, -p)

69

elif side[p] and side[p] != side[r]:

69

elif side[p] and side[p] != side[r]:

70

# p was interesting but now we know better

70

# p was interesting but now we know better

71

side[p] = 0

71

side[p] = 0

72

interesting -= 1

72

interesting -= 1

73

hascommonancestor = True

73

hascommonancestor = True

74

if side[r]:

74

if side[r]:

75

limit = r # lowest rev visited

75

limit = r # lowest rev visited

76

interesting -= 1

76

interesting -= 1

77

78

if not hascommonancestor:

78

if not hascommonancestor:

79

return None

79

return None

80

81

# Consider the following flow (see test-commit-amend.t under issue4405):

81

# Consider the following flow (see test-commit-amend.t under issue4405):

82

# 1/ File 'a0' committed

82

# 1/ File 'a0' committed

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

84

# 3/ Move back to first commit

84

# 3/ Move back to first commit

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

87

#

87

#

88

# During the amend in step five, we will be in this state:

88

# During the amend in step five, we will be in this state:

89

#

89

#

90

# @ 3 temporary amend commit for a1-amend

90

# @ 3 temporary amend commit for a1-amend

91

# |

91

# |

92

# o 2 a1-amend

92

# o 2 a1-amend

93

# |

93

# |

94

# | o 1 a1

94

# | o 1 a1

95

# |/

95

# |/

96

# o 0 a0

96

# o 0 a0

97

#

97

#

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

99

# yet the filelog has the copy information in rev 1 and we will not look

99

# yet the filelog has the copy information in rev 1 and we will not look

100

# back far enough unless we also look at the a and b as candidates.

100

# back far enough unless we also look at the a and b as candidates.

101

# This only occurs when a is a descendent of b or visa-versa.

101

# This only occurs when a is a descendent of b or visa-versa.

102

return min(limit, a, b)

102

return min(limit, a, b)

103

104

def _chain(src, dst, a, b):

104

def _chain(src, dst, a, b):

105

'''chain two sets of copies a->b'''

105

'''chain two sets of copies a->b'''

106

t = a.copy()

106

t = a.copy()

107

for k, v in b.iteritems():

107

for k, v in b.iteritems():

108

if v in t:

108

if v in t:

109

# found a chain

109

# found a chain

110

if t[v] != k:

110

if t[v] != k:

111

# file wasn't renamed back to itself

111

# file wasn't renamed back to itself

112

t[k] = t[v]

112

t[k] = t[v]

113

if v not in dst:

113

if v not in dst:

114

# chain was a rename, not a copy

114

# chain was a rename, not a copy

115

del t[v]

115

del t[v]

116

if v in src:

116

if v in src:

117

# file is a copy of an existing file

117

# file is a copy of an existing file

118

t[k] = v

118

t[k] = v

119

120

# remove criss-crossed copies

120

# remove criss-crossed copies

121

for k, v in t.items():

121

for k, v in t.items():

122

if k in src and v in dst:

122

if k in src and v in dst:

123

del t[k]

123

del t[k]

124

125

return t

125

return t

126

127

def _tracefile(fctx, am, limit=-1):

127

def _tracefile(fctx, am, limit=-1):

128

'''return file context that is the ancestor of fctx present in ancestor

128

'''return file context that is the ancestor of fctx present in ancestor

129

manifest am, stopping after the first ancestor lower than limit'''

129

manifest am, stopping after the first ancestor lower than limit'''

130

131

for f in fctx.ancestors():

131

for f in fctx.ancestors():

132

if am.get(f.path(), None) == f.filenode():

132

if am.get(f.path(), None) == f.filenode():

133

return f

133

return f

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

135

return None

135

return None

136

137

def _dirstatecopies(d):

137

def _dirstatecopies(d):

138

ds = d._repo.dirstate

138

ds = d._repo.dirstate

139

c = ds.copies().copy()

139

c = ds.copies().copy()

140

for k in c.keys():

140

for k in c.keys():

141

if ds[k] not in 'anm':

141

if ds[k] not in 'anm':

142

del c[k]

142

del c[k]

143

return c

143

return c

144

145

def _computeforwardmissing(a, b, match=None):

145

def _computeforwardmissing(a, b, match=None):

146

"""Computes which files are in b but not a.

146

"""Computes which files are in b but not a.

147

This is its own function so extensions can easily wrap this call to see what

147

This is its own function so extensions can easily wrap this call to see what

148

files _forwardcopies is about to process.

148

files _forwardcopies is about to process.

149

"""

149

"""

150

ma = a.manifest()

150

ma = a.manifest()

151

mb = b.manifest()

151

mb = b.manifest()

152

if match:

152

if match:

153

ma = ma.matches(match)

153

ma = ma.matches(match)

154

mb = mb.matches(match)

154

mb = mb.matches(match)

155

return mb.filesnotin(ma)

155

return mb.filesnotin(ma)

156

157

def _forwardcopies(a, b, match=None):

157

def _forwardcopies(a, b, match=None):

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

159

160

# check for working copy

160

# check for working copy

161

w = None

161

w = None

162

if b.rev() is None:

162

if b.rev() is None:

163

w = b

163

w = b

164

b = w.p1()

164

b = w.p1()

165

if a == b:

165

if a == b:

166

# short-circuit to avoid issues with merge states

166

# short-circuit to avoid issues with merge states

167

return _dirstatecopies(w)

167

return _dirstatecopies(w)

168

169

# files might have to be traced back to the fctx parent of the last

169

# files might have to be traced back to the fctx parent of the last

170

# one-side-only changeset, but not further back than that

170

# one-side-only changeset, but not further back than that

171

limit = _findlimit(a._repo, a.rev(), b.rev())

171

limit = _findlimit(a._repo, a.rev(), b.rev())

172

if limit is None:

172

if limit is None:

173

limit = -1

173

limit = -1

174

am = a.manifest()

174

am = a.manifest()

175

176

# find where new files came from

176

# find where new files came from

177

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

178

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

179

cm = {}

179

cm = {}

180

181

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

182

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

183

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

184

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

185

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

186

# this comparison.

186

# this comparison.

187

forwardmissingmatch = match

187

forwardmissingmatch = match

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

for f in missing:

193

for f in missing:

194

fctx = b[f]

194

fctx = b[f]

195

fctx._ancestrycontext = ancestrycontext

195

fctx._ancestrycontext = ancestrycontext

196

ofctx = _tracefile(fctx, am, limit)

196

ofctx = _tracefile(fctx, am, limit)

197

if ofctx:

197

if ofctx:

198

cm[f] = ofctx.path()

198

cm[f] = ofctx.path()

199

200

# combine copies from dirstate if necessary

200

# combine copies from dirstate if necessary

201

if w is not None:

201

if w is not None:

202

cm = _chain(a, w, cm, _dirstatecopies(w))

202

cm = _chain(a, w, cm, _dirstatecopies(w))

203

204

return cm

204

return cm

205

206

def _backwardrenames(a, b):

206

def _backwardrenames(a, b):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

208

return {}

208

return {}

209

210

# Even though we're not taking copies into account, 1:n rename situations

210

# Even though we're not taking copies into account, 1:n rename situations

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

212

# arbitrarily pick one of the renames.

212

# arbitrarily pick one of the renames.

213

f = _forwardcopies(b, a)

213

f = _forwardcopies(b, a)

214

r = {}

214

r = {}

215

for k, v in sorted(f.iteritems()):

215

for k, v in sorted(f.iteritems()):

216

# remove copies

216

# remove copies

217

if v in a:

217

if v in a:

218

continue

218

continue

219

r[v] = k

219

r[v] = k

220

return r

220

return r

221

222

def pathcopies(x, y, match=None):

222

def pathcopies(x, y, match=None):

223

'''find {dst@y: src@x} copy mapping for directed compare'''

223

'''find {dst@y: src@x} copy mapping for directed compare'''

224

if x == y or not x or not y:

224

if x == y or not x or not y:

225

return {}

225

return {}

226

a = y.ancestor(x)

226

a = y.ancestor(x)

227

if a == x:

227

if a == x:

228

return _forwardcopies(x, y, match=match)

228

return _forwardcopies(x, y, match=match)

229

if a == y:

229

if a == y:

230

return _backwardrenames(x, y)

230

return _backwardrenames(x, y)

231

return _chain(x, y, _backwardrenames(x, a),

231

return _chain(x, y, _backwardrenames(x, a),

232

_forwardcopies(a, y, match=match))

232

_forwardcopies(a, y, match=match))

233

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

236

and c2. This is its own function so extensions can easily wrap this call

236

and c2. This is its own function so extensions can easily wrap this call

237

to see what files mergecopies is about to process.

237

to see what files mergecopies is about to process.

238

239

Even though c1 and c2 are not used in this function, they are useful in

239

Even though c1 and c2 are not used in this function, they are useful in

240

other extensions for being able to read the file nodes of the changed files.

240

other extensions for being able to read the file nodes of the changed files.

241

"""

241

"""

242

u1 = sorted(addedinm1 - addedinm2)

242

u1 = sorted(addedinm1 - addedinm2)

243

u2 = sorted(addedinm2 - addedinm1)

243

u2 = sorted(addedinm2 - addedinm1)

244

245

if u1:

245

if u1:

246

repo.ui.debug(" unmatched files in local:\n %s\n"

246

repo.ui.debug(" unmatched files in local:\n %s\n"

247

% "\n ".join(u1))

247

% "\n ".join(u1))

248

if u2:

248

if u2:

249

repo.ui.debug(" unmatched files in other:\n %s\n"

249

repo.ui.debug(" unmatched files in other:\n %s\n"

250

% "\n ".join(u2))

250

% "\n ".join(u2))

251

return u1, u2

251

return u1, u2

252

253

def _makegetfctx(ctx):

253

def _makegetfctx(ctx):

254

"""return a 'getfctx' function suitable for _checkcopies usage

254

"""return a 'getfctx' function suitable for _checkcopies usage

255

256

We have to re-setup the function building 'filectx' for each

256

We have to re-setup the function building 'filectx' for each

257

'_checkcopies' to ensure the linkrev adjustment is properly setup for

257

'_checkcopies' to ensure the linkrev adjustment is properly setup for

258

each. Linkrev adjustment is important to avoid bug in rename

258

each. Linkrev adjustment is important to avoid bug in rename

259

detection. Moreover, having a proper '_ancestrycontext' setup ensures

259

detection. Moreover, having a proper '_ancestrycontext' setup ensures

260

the performance impact of this adjustment is kept limited. Without it,

260

the performance impact of this adjustment is kept limited. Without it,

261

each file could do a full dag traversal making the time complexity of

261

each file could do a full dag traversal making the time complexity of

262

the operation explode (see issue4537).

262

the operation explode (see issue4537).

263

264

This function exists here mostly to limit the impact on stable. Feel

264

This function exists here mostly to limit the impact on stable. Feel

265

free to refactor on default.

265

free to refactor on default.

266

"""

266

"""

267

rev = ctx.rev()

267

rev = ctx.rev()

268

repo = ctx._repo

268

repo = ctx._repo

269

ac = getattr(ctx, '_ancestrycontext', None)

269

ac = getattr(ctx, '_ancestrycontext', None)

270

if ac is None:

270

if ac is None:

271

revs = [rev]

271

revs = [rev]

272

if rev is None:

272

if rev is None:

273

revs = [p.rev() for p in ctx.parents()]

273

revs = [p.rev() for p in ctx.parents()]

274

ac = repo.changelog.ancestors(revs, inclusive=True)

274

ac = repo.changelog.ancestors(revs, inclusive=True)

275

ctx._ancestrycontext = ac

275

ctx._ancestrycontext = ac

276

def makectx(f, n):

276

def makectx(f, n):

277

if len(n) != 20: # in a working context?

277

if len(n) != 20: # in a working context?

278

if ctx.rev() is None:

278

if ctx.rev() is None:

279

return ctx.filectx(f)

279

return ctx.filectx(f)

280

return repo[None][f]

280

return repo[None][f]

281

fctx = repo.filectx(f, fileid=n)

281

fctx = repo.filectx(f, fileid=n)

282

# setup only needed for filectx not create from a changectx

282

# setup only needed for filectx not create from a changectx

283

fctx._ancestrycontext = ac

283

fctx._ancestrycontext = ac

284

fctx._descendantrev = rev

284

fctx._descendantrev = rev

285

return fctx

285

return fctx

286

return util.lrucachefunc(makectx)

286

return util.lrucachefunc(makectx)

287

288

def mergecopies(repo, c1, c2, ca):

288

def mergecopies(repo, c1, c2, ca):

289

"""

289

"""

290

Find moves and copies between context c1 and c2 that are relevant

290

Find moves and copies between context c1 and c2 that are relevant

291

for merging.

291

for merging.

292

293

Returns four dicts: "copy", "movewithdir", "diverge", and

293

Returns four dicts: "copy", "movewithdir", "diverge", and

294

"renamedelete".

294

"renamedelete".

295

296

"copy" is a mapping from destination name -> source name,

296

"copy" is a mapping from destination name -> source name,

297

where source is in c1 and destination is in c2 or vice-versa.

297

where source is in c1 and destination is in c2 or vice-versa.

298

299

"movewithdir" is a mapping from source name -> destination name,

299

"movewithdir" is a mapping from source name -> destination name,

300

where the file at source present in one context but not the other

300

where the file at source present in one context but not the other

301

needs to be moved to destination by the merge process, because the

301

needs to be moved to destination by the merge process, because the

302

other context moved the directory it is in.

302

other context moved the directory it is in.

303

304

"diverge" is a mapping of source name -> list of destination names

304

"diverge" is a mapping of source name -> list of destination names

305

for divergent renames.

305

for divergent renames.

306

307

"renamedelete" is a mapping of source name -> list of destination

307

"renamedelete" is a mapping of source name -> list of destination

308

names for files deleted in c1 that were renamed in c2 or vice-versa.

308

names for files deleted in c1 that were renamed in c2 or vice-versa.

309

"""

309

"""

310

# avoid silly behavior for update from empty dir

310

# avoid silly behavior for update from empty dir

311

if not c1 or not c2 or c1 == c2:

311

if not c1 or not c2 or c1 == c2:

312

return {}, {}, {}, {}

312

return {}, {}, {}, {}

313

314

# avoid silly behavior for parent -> working dir

314

# avoid silly behavior for parent -> working dir

315

if c2.node() is None and c1.node() == repo.dirstate.p1():

315

if c2.node() is None and c1.node() == repo.dirstate.p1():

316

return repo.dirstate.copies(), {}, {}, {}

316

return repo.dirstate.copies(), {}, {}, {}

317

318

# Copy trace disabling is explicitly below the node == p1 logic above

318

# Copy trace disabling is explicitly below the node == p1 logic above

319

# because the logic above is required for a simple copy to be kept across a

319

# because the logic above is required for a simple copy to be kept across a

320

# rebase.

320

# rebase.

321

if repo.ui.configbool('experimental', 'disablecopytrace'):

321

if repo.ui.configbool('experimental', 'disablecopytrace'):

322

return {}, {}, {}, {}

322

return {}, {}, {}, {}

323

324

limit = _findlimit(repo, c1.rev(), c2.rev())

324

limit = _findlimit(repo, c1.rev(), c2.rev())

325

if limit is None:

325

if limit is None:

326

# no common ancestor, no copies

326

# no common ancestor, no copies

327

return {}, {}, {}, {}

327

return {}, {}, {}, {}

328

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

328

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

329

330

m1 = c1.manifest()

330

m1 = c1.manifest()

331

m2 = c2.manifest()

331

m2 = c2.manifest()

332

ma = ca.manifest()

332

ma = ca.manifest()

333

334

# see _checkcopies documentation below for these dicts

334

# see _checkcopies documentation below for these dicts

335

copy1, copy2 = {}, {}

335

copy1, copy2 = {}, {}

336

movewithdir1, movewithdir2 = {}, {}

336

movewithdir1, movewithdir2 = {}, {}

337

fullcopy1, fullcopy2 = {}, {}

337

fullcopy1, fullcopy2 = {}, {}

338

diverge = {}

338

diverge = {}

339

340

# find interesting file sets from manifests

340

# find interesting file sets from manifests

341

addedinm1 = m1.filesnotin(ma)

341

addedinm1 = m1.filesnotin(ma)

342

addedinm2 = m2.filesnotin(ma)

342

addedinm2 = m2.filesnotin(ma)

343

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

343

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

344

u1u, u2u = u1r, u2r

344

u1u, u2u = u1r, u2r

345

bothnew = sorted(addedinm1 & addedinm2)

345

bothnew = sorted(addedinm1 & addedinm2)

346

347

for f in u1u:

347

for f in u1u:

348

_checkcopies(c1, f, m1, m2, ca, limit, diverge, copy1, fullcopy1)

348

_checkcopies(c1, f, m1, m2, ca, limit, diverge, copy1, fullcopy1)

349

350

for f in u2u:

350

for f in u2u:

351

_checkcopies(c2, f, m2, m1, ca, limit, diverge, copy2, fullcopy2)

351

_checkcopies(c2, f, m2, m1, ca, limit, diverge, copy2, fullcopy2)

352

353

copy = dict(copy1.items() + copy2.items())

353

copy = dict(copy1.items() + copy2.items())

354

movewithdir = dict(movewithdir1.items() + movewithdir2.items())

354

movewithdir = dict(movewithdir1.items() + movewithdir2.items())

355

fullcopy = dict(fullcopy1.items() + fullcopy2.items())

355

fullcopy = dict(fullcopy1.items() + fullcopy2.items())

356

357

renamedelete = {}

357

renamedelete = {}

358

renamedeleteset = set()

358

renamedeleteset = set()

359

divergeset = set()

359

divergeset = set()

360

for of, fl in diverge.items():

360

for of, fl in diverge.items():

361

if len(fl) == 1 or of in c1 or of in c2:

361

if len(fl) == 1 or of in c1 or of in c2:

362

del diverge[of] # not actually divergent, or not a rename

362

del diverge[of] # not actually divergent, or not a rename

363

if of not in c1 and of not in c2:

363

if of not in c1 and of not in c2:

364

# renamed on one side, deleted on the other side, but filter

364

# renamed on one side, deleted on the other side, but filter

365

# out files that have been renamed and then deleted

365

# out files that have been renamed and then deleted

366

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

366

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

367

renamedeleteset.update(fl) # reverse map for below

367

renamedeleteset.update(fl) # reverse map for below

368

else:

368

else:

369

divergeset.update(fl) # reverse map for below

369

divergeset.update(fl) # reverse map for below

370

371

if bothnew:

371

if bothnew:

372

repo.ui.debug(" unmatched files new in both:\n %s\n"

372

repo.ui.debug(" unmatched files new in both:\n %s\n"

373

% "\n ".join(bothnew))

373

% "\n ".join(bothnew))

374

bothdiverge, _copy, _fullcopy = {}, {}, {}

374

bothdiverge, _copy, _fullcopy = {}, {}, {}

375

for f in bothnew:

375

for f in bothnew:

376

_checkcopies(c1, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

376

_checkcopies(c1, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

377

_checkcopies(c2, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

377

_checkcopies(c2, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

378

for of, fl in bothdiverge.items():

378

for of, fl in bothdiverge.items():

379

if len(fl) == 2 and fl[0] == fl[1]:

379

if len(fl) == 2 and fl[0] == fl[1]:

380

copy[fl[0]] = of # not actually divergent, just matching renames

380

copy[fl[0]] = of # not actually divergent, just matching renames

381

382

if fullcopy and repo.ui.debugflag:

382

if fullcopy and repo.ui.debugflag:

383

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

383

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

384

"% = renamed and deleted):\n")

384

"% = renamed and deleted):\n")

385

for f in sorted(fullcopy):

385

for f in sorted(fullcopy):

386

note = ""

386

note = ""

387

if f in copy:

387

if f in copy:

388

note += "*"

388

note += "*"

389

if f in divergeset:

389

if f in divergeset:

390

note += "!"

390

note += "!"

391

if f in renamedeleteset:

391

if f in renamedeleteset:

392

note += "%"

392

note += "%"

393

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

393

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

394

note))

394

note))

395

del divergeset

395

del divergeset

396

397

if not fullcopy:

397

if not fullcopy:

398

return copy, movewithdir, diverge, renamedelete

398

return copy, movewithdir, diverge, renamedelete

399

400

repo.ui.debug(" checking for directory renames\n")

400

repo.ui.debug(" checking for directory renames\n")

401

402

# generate a directory move map

402

# generate a directory move map

403

d1, d2 = c1.dirs(), c2.dirs()

403

d1, d2 = c1.dirs(), c2.dirs()

404

# Hack for adding '', which is not otherwise added, to d1 and d2

404

# Hack for adding '', which is not otherwise added, to d1 and d2

405

d1.addpath('/')

405

d1.addpath('/')

406

d2.addpath('/')

406

d2.addpath('/')

407

invalid = set()

407

invalid = set()

408

dirmove = {}

408

dirmove = {}

409

410

# examine each file copy for a potential directory move, which is

410

# examine each file copy for a potential directory move, which is

411

# when all the files in a directory are moved to a new directory

411

# when all the files in a directory are moved to a new directory

412

for dst, src in fullcopy.iteritems():

412

for dst, src in fullcopy.iteritems():

413

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

413

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

414

if dsrc in invalid:

414

if dsrc in invalid:

415

# already seen to be uninteresting

415

# already seen to be uninteresting

416

continue

416

continue

417

elif dsrc in d1 and ddst in d1:

417

elif dsrc in d1 and ddst in d1:

418

# directory wasn't entirely moved locally

418

# directory wasn't entirely moved locally

419

invalid.add(dsrc + "/")

419

invalid.add(dsrc + "/")

420

elif dsrc in d2 and ddst in d2:

420

elif dsrc in d2 and ddst in d2:

421

# directory wasn't entirely moved remotely

421

# directory wasn't entirely moved remotely

422

invalid.add(dsrc + "/")

422

invalid.add(dsrc + "/")

423

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

423

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

424

# files from the same directory moved to two different places

424

# files from the same directory moved to two different places

425

invalid.add(dsrc + "/")

425

invalid.add(dsrc + "/")

426

else:

426

else:

427

# looks good so far

427

# looks good so far

428

dirmove[dsrc + "/"] = ddst + "/"

428

dirmove[dsrc + "/"] = ddst + "/"

429

430

for i in invalid:

430

for i in invalid:

431

if i in dirmove:

431

if i in dirmove:

432

del dirmove[i]

432

del dirmove[i]

433

del d1, d2, invalid

433

del d1, d2, invalid

434

435

if not dirmove:

435

if not dirmove:

436

return copy, movewithdir, diverge, renamedelete

436

return copy, movewithdir, diverge, renamedelete

437

438

for d in dirmove:

438

for d in dirmove:

439

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

439

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

440

(d, dirmove[d]))

440

(d, dirmove[d]))

441

442

# check unaccounted nonoverlapping files against directory moves

442

# check unaccounted nonoverlapping files against directory moves

443

for f in u1r + u2r:

443

for f in u1r + u2r:

444

if f not in fullcopy:

444

if f not in fullcopy:

445

for d in dirmove:

445

for d in dirmove:

446

if f.startswith(d):

446

if f.startswith(d):

447

# new file added in a directory that was moved, move it

447

# new file added in a directory that was moved, move it

448

df = dirmove[d] + f[len(d):]

448

df = dirmove[d] + f[len(d):]

449

if df not in copy:

449

if df not in copy:

450

movewithdir[f] = df

450

movewithdir[f] = df

451

repo.ui.debug((" pending file src: '%s' -> "

451

repo.ui.debug((" pending file src: '%s' -> "

452

"dst: '%s'\n") % (f, df))

452

"dst: '%s'\n") % (f, df))

453

break

453

break

454

455

return copy, movewithdir, diverge, renamedelete

455

return copy, movewithdir, diverge, renamedelete

456

457

def _checkcopies(ctx, f, m1, m2, base, limit, diverge, copy, fullcopy):

457

def _checkcopies(ctx, f, m1, m2, base, limit, diverge, copy, fullcopy):

458

"""

458

"""

459

check possible copies of f from m1 to m2

459

check possible copies of f from m1 to m2

460

461

ctx = starting context for f in m1

461

ctx = starting context for f in m1

462

f = the filename to check (as in m1)

462

f = the filename to check (as in m1)

463

m1 = the source manifest

463

m1 = the source manifest

464

m2 = the destination manifest

464

m2 = the destination manifest

465

base = the changectx used as a merge base

465

base = the changectx used as a merge base

466

limit = the rev number to not search beyond

466

limit = the rev number to not search beyond

467

diverge = record all diverges in this dict

467

diverge = record all diverges in this dict

468

copy = record all non-divergent copies in this dict

468

copy = record all non-divergent copies in this dict

469

fullcopy = record all copies in this dict

469

fullcopy = record all copies in this dict

470

471

note: limit is only an optimization, and there is no guarantee that

471

note: limit is only an optimization, and there is no guarantee that

472

irrelevant revisions will not be limited

472

irrelevant revisions will not be limited

473

there is no easy way to make this algorithm stop in a guaranteed way

473

there is no easy way to make this algorithm stop in a guaranteed way

474

once it "goes behind a certain revision".

474

once it "goes behind a certain revision".

475

"""

475

"""

476

477

mb = base.manifest()

477

mb = base.manifest()

478

getfctx = _makegetfctx(ctx)

478

getfctx = _makegetfctx(ctx)

479

480

def _related(f1, f2, limit):

480

def _related(f1, f2, limit):

481

# Walk back to common ancestor to see if the two files originate

481

# Walk back to common ancestor to see if the two files originate

482

# from the same file. Since workingfilectx's rev() is None it messes

482

# from the same file. Since workingfilectx's rev() is None it messes

483

# up the integer comparison logic, hence the pre-step check for

483

# up the integer comparison logic, hence the pre-step check for

484

# None (f1 and f2 can only be workingfilectx's initially).

484

# None (f1 and f2 can only be workingfilectx's initially).

485

486

if f1 == f2:

486

if f1 == f2:

487

return f1 # a match

487

return f1 # a match

488

489

g1, g2 = f1.ancestors(), f2.ancestors()

489

g1, g2 = f1.ancestors(), f2.ancestors()

490

try:

490

try:

491

f1r, f2r = f1.linkrev(), f2.linkrev()

491

f1r, f2r = f1.linkrev(), f2.linkrev()

492

493

if f1r is None:

493

if f1r is None:

494

f1 = next(g1)

494

f1 = next(g1)

495

if f2r is None:

495

if f2r is None:

496

f2 = next(g2)

496

f2 = next(g2)

497

498

while True:

498

while True:

499

f1r, f2r = f1.linkrev(), f2.linkrev()

499

f1r, f2r = f1.linkrev(), f2.linkrev()

500

if f1r > f2r:

500

if f1r > f2r:

501

f1 = next(g1)

501

f1 = next(g1)

502

elif f2r > f1r:

502

elif f2r > f1r:

503

f2 = next(g2)

503

f2 = next(g2)

504

elif f1 == f2:

504

elif f1 == f2:

505

return f1 # a match

505

return f1 # a match

506

elif f1r == f2r or f1r < limit or f2r < limit:

506

elif f1r == f2r or f1r < limit or f2r < limit:

507

return False # copy no longer relevant

507

return False # copy no longer relevant

508

except StopIteration:

508

except StopIteration:

509

return False

509

return False

510

511

of = None

511

of = None

512

seen = set([f])

512

seen = set([f])

513

for oc in getfctx(f, m1[f]).ancestors():

513

for oc in getfctx(f, m1[f]).ancestors():

514

ocr = oc.linkrev()

514

ocr = oc.linkrev()

515

of = oc.path()

515

of = oc.path()

516

if of in seen:

516

if of in seen:

517

# check limit late - grab last rename before

517

# check limit late - grab last rename before

518

if ocr < limit:

518

if ocr < limit:

519

break

519

break

520

continue

520

continue

521

seen.add(of)

521

seen.add(of)

522

523

fullcopy[f] = of # remember for dir rename detection

523

fullcopy[f] = of # remember for dir rename detection

524

if of not in m2:

524

if of not in m2:

525

continue # no match, keep looking

525

continue # no match, keep looking

526

if m2[of] == mb.get(of):

526

if m2[of] == mb.get(of):

527

return # no merge needed, quit early

527

return # no merge needed, quit early

528

c2 = getfctx(of, m2[of])

528

c2 = getfctx(of, m2[of])

529

# c2 might be a plain new file on added on destination side that is

530

# unrelated to the droids we are looking for.

529

cr = _related(oc, c2, base.rev())

531

cr = _related(oc, c2, base.rev())

530

if cr and (of == f or of == c2.path()): # non-divergent

532

if cr and (of == f or of == c2.path()): # non-divergent

531

copy[f] = of

533

copy[f] = of

532

return

534

return

533

535

534

if of in mb:

536

if of in mb:

535

diverge.setdefault(of, []).append(f)

537

diverge.setdefault(of, []).append(f)

536

538

537

def duplicatecopies(repo, rev, fromrev, skiprev=None):

539

def duplicatecopies(repo, rev, fromrev, skiprev=None):

538

'''reproduce copies from fromrev to rev in the dirstate

540

'''reproduce copies from fromrev to rev in the dirstate

539

541

540

If skiprev is specified, it's a revision that should be used to

542

If skiprev is specified, it's a revision that should be used to

541

filter copy records. Any copies that occur between fromrev and

543

filter copy records. Any copies that occur between fromrev and

542

skiprev will not be duplicated, even if they appear in the set of

544

skiprev will not be duplicated, even if they appear in the set of

543

copies between fromrev and rev.

545

copies between fromrev and rev.

544

'''

546

'''

545

exclude = {}

547

exclude = {}

546

if (skiprev is not None and

548

if (skiprev is not None and

547

not repo.ui.configbool('experimental', 'disablecopytrace')):

549

not repo.ui.configbool('experimental', 'disablecopytrace')):

548

# disablecopytrace skips this line, but not the entire function because

550

# disablecopytrace skips this line, but not the entire function because

549

# the line below is O(size of the repo) during a rebase, while the rest

551

# the line below is O(size of the repo) during a rebase, while the rest

550

# of the function is much faster (and is required for carrying copy

552

# of the function is much faster (and is required for carrying copy

551

# metadata across the rebase anyway).

553

# metadata across the rebase anyway).

552

exclude = pathcopies(repo[fromrev], repo[skiprev])

554

exclude = pathcopies(repo[fromrev], repo[skiprev])

553

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

555

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

554

# copies.pathcopies returns backward renames, so dst might not

556

# copies.pathcopies returns backward renames, so dst might not

555

# actually be in the dirstate

557

# actually be in the dirstate

556

if dst in exclude:

558

if dst in exclude:

557

continue

559

continue

558

if repo.dirstate[dst] in "nma":

560

if repo.dirstate[dst] in "nma":

559

repo.dirstate.copy(src, dst)

561

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import heapq
             from . import (
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 if match:
                     ma = ma.matches(match)
                     mb = mb.matches(match)
                 return mb.filesnotin(ma)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if not match and b.p1() == a and b.p2().node() == node.nullid:
                     forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if len(n) != 20:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}, {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 # see _checkcopies documentation below for these dicts
                 copy1, copy2 = {}, {}
                 movewithdir1, movewithdir2 = {}, {}
                 fullcopy1, fullcopy2 = {}, {}
                 diverge = {}
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(ma)
                 addedinm2 = m2.filesnotin(ma)
                 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                 u1u, u2u = u1r, u2r
                 bothnew = sorted(addedinm1 & addedinm2)
                 for f in u1u:
                     _checkcopies(c1, f, m1, m2, ca, limit, diverge, copy1, fullcopy1)
                 for f in u2u:
                     _checkcopies(c2, f, m2, m1, ca, limit, diverge, copy2, fullcopy2)
                 copy = dict(copy1.items() + copy2.items())
                 movewithdir = dict(movewithdir1.items() + movewithdir2.items())
                 fullcopy = dict(fullcopy1.items() + fullcopy2.items())
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge, _copy, _fullcopy = {}, {}, {}
                 for f in bothnew:
                     _checkcopies(c1, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
                     _checkcopies(c2, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def _checkcopies(ctx, f, m1, m2, base, limit, diverge, copy, fullcopy):
                 """
                 check possible copies of f from m1 to m2
                 ctx = starting context for f in m1
                 f = the filename to check (as in m1)
                 m1 = the source manifest
                 m2 = the destination manifest
                 base = the changectx used as a merge base
                 limit = the rev number to not search beyond
                 diverge = record all diverges in this dict
                 copy = record all non-divergent copies in this dict
                 fullcopy = record all copies in this dict
                 note: limit is only an optimization, and there is no guarantee that
                 irrelevant revisions will not be limited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 mb = base.manifest()
                 getfctx = _makegetfctx(ctx)
                 def _related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r is None:
                             f1 = next(g1)
                         if f2r is None:
                             f2 = next(g2)
                         while True:
                             f1r, f2r = f1.linkrev(), f2.linkrev()
                             if f1r > f2r:
                                 f1 = next(g1)
                             elif f2r > f1r:
                                 f2 = next(g2)
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 of = None
                 seen = set([f])
                 for oc in getfctx(f, m1[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     fullcopy[f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getfctx(of, m2[of])
+                    # c2 might be a plain new file on added on destination side that is
+                    # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, base.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         copy[f] = of
                         return
                 if of in mb:
                     diverge.setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if (skiprev is not None and
                     not repo.ui.configbool('experimental', 'disablecopytrace')):
                     # disablecopytrace skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)