upstream/mercurial-mirror Commit - r16168:7bbabfe2

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _nonoverlap(d1, d2, d3):

11

def _nonoverlap(d1, d2, d3):

12

"Return list of elements in d1 not in d2 or d3"

12

"Return list of elements in d1 not in d2 or d3"

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

14

15

def _dirname(f):

15

def _dirname(f):

16

s = f.rfind("/")

16

s = f.rfind("/")

17

if s == -1:

17

if s == -1:

18

return ""

18

return ""

19

return f[:s]

19

return f[:s]

20

21

def _dirs(files):

21

def _dirs(files):

22

d = set()

22

d = set()

23

for f in files:

23

for f in files:

24

f = _dirname(f)

24

f = _dirname(f)

25

while f not in d:

25

while f not in d:

26

d.add(f)

26

d.add(f)

27

f = _dirname(f)

27

f = _dirname(f)

28

return d

28

return d

29

30

def _findlimit(repo, a, b):

30

def _findlimit(repo, a, b):

31

"""Find the earliest revision that's an ancestor of a or b but not both,

31

"""Find the earliest revision that's an ancestor of a or b but not both,

32

None if no such revision exists.

32

None if no such revision exists.

33

"""

33

"""

34

# basic idea:

34

# basic idea:

35

# - mark a and b with different sides

35

# - mark a and b with different sides

36

# - if a parent's children are all on the same side, the parent is

36

# - if a parent's children are all on the same side, the parent is

37

# on that side, otherwise it is on no side

37

# on that side, otherwise it is on no side

38

# - walk the graph in topological order with the help of a heap;

38

# - walk the graph in topological order with the help of a heap;

39

# - add unseen parents to side map

39

# - add unseen parents to side map

40

# - clear side of any parent that has children on different sides

40

# - clear side of any parent that has children on different sides

41

# - track number of interesting revs that might still be on a side

41

# - track number of interesting revs that might still be on a side

42

# - track the lowest interesting rev seen

42

# - track the lowest interesting rev seen

43

# - quit when interesting revs is zero

43

# - quit when interesting revs is zero

44

45

cl = repo.changelog

45

cl = repo.changelog

46

working = len(cl) # pseudo rev for the working directory

46

working = len(cl) # pseudo rev for the working directory

47

if a is None:

47

if a is None:

48

a = working

48

a = working

49

if b is None:

49

if b is None:

50

b = working

50

b = working

51

52

side = {a: -1, b: 1}

52

side = {a: -1, b: 1}

53

visit = [-a, -b]

53

visit = [-a, -b]

54

heapq.heapify(visit)

54

heapq.heapify(visit)

55

interesting = len(visit)

55

interesting = len(visit)

56

hascommonancestor = False

56

hascommonancestor = False

57

limit = working

57

limit = working

58

59

while interesting:

59

while interesting:

60

r = -heapq.heappop(visit)

60

r = -heapq.heappop(visit)

61

if r == working:

61

if r == working:

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

62

parents = [cl.rev(p) for p in repo.dirstate.parents()]

63

else:

63

else:

64

parents = cl.parentrevs(r)

64

parents = cl.parentrevs(r)

65

for p in parents:

65

for p in parents:

66

if p < 0:

66

if p < 0:

67

continue

67

continue

68

if p not in side:

68

if p not in side:

69

# first time we see p; add it to visit

69

# first time we see p; add it to visit

70

side[p] = side[r]

70

side[p] = side[r]

71

if side[p]:

71

if side[p]:

72

interesting += 1

72

interesting += 1

73

heapq.heappush(visit, -p)

73

heapq.heappush(visit, -p)

74

elif side[p] and side[p] != side[r]:

74

elif side[p] and side[p] != side[r]:

75

# p was interesting but now we know better

75

# p was interesting but now we know better

76

side[p] = 0

76

side[p] = 0

77

interesting -= 1

77

interesting -= 1

78

hascommonancestor = True

78

hascommonancestor = True

79

if side[r]:

79

if side[r]:

80

limit = r # lowest rev visited

80

limit = r # lowest rev visited

81

interesting -= 1

81

interesting -= 1

82

83

if not hascommonancestor:

83

if not hascommonancestor:

84

return None

84

return None

85

return limit

85

return limit

86

87

def _chain(src, dst, a, b):

87

def _chain(src, dst, a, b):

88

'''chain two sets of copies a->b'''

88

'''chain two sets of copies a->b'''

89

t = a.copy()

89

t = a.copy()

90

for k, v in b.iteritems():

90

for k, v in b.iteritems():

91

if v in t:

91

if v in t:

92

# found a chain

92

# found a chain

93

if t[v] != k:

93

if t[v] != k:

94

# file wasn't renamed back to itself

94

# file wasn't renamed back to itself

95

t[k] = t[v]

95

t[k] = t[v]

96

if v not in dst:

96

if v not in dst:

97

# chain was a rename, not a copy

97

# chain was a rename, not a copy

98

del t[v]

98

del t[v]

99

if v in src:

99

if v in src:

100

# file is a copy of an existing file

100

# file is a copy of an existing file

101

t[k] = v

101

t[k] = v

102

103

# remove criss-crossed copies

103

# remove criss-crossed copies

104

for k, v in t.items():

104

for k, v in t.items():

105

if k in src and v in dst:

105

if k in src and v in dst:

106

del t[k]

106

del t[k]

107

108

return t

108

return t

109

110

def _tracefile(fctx, actx):

110

def _tracefile(fctx, actx):

111

'''return file context that is the ancestor of fctx present in actx'''

111

'''return file context that is the ancestor of fctx present in actx'''

112

stop = actx.rev()

112

stop = actx.rev()

113

am = actx.manifest()

113

am = actx.manifest()

114

115

for f in fctx.ancestors():

115

for f in fctx.ancestors():

116

if am.get(f.path(), None) == f.filenode():

116

if am.get(f.path(), None) == f.filenode():

117

return f

117

return f

118

if f.rev() < stop:

118

if f.rev() < stop:

119

return None

119

return None

120

121

def _dirstatecopies(d):

121

def _dirstatecopies(d):

122

ds = d._repo.dirstate

122

ds = d._repo.dirstate

123

c = ds.copies().copy()

123

c = ds.copies().copy()

124

for k in c.keys():

124

for k in c.keys():

125

if ds[k] not in 'anm':

125

if ds[k] not in 'anm':

126

del c[k]

126

del c[k]

127

return c

127

return c

128

129

def _forwardcopies(a, b):

129

def _forwardcopies(a, b):

130

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

130

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

131

132

# check for working copy

132

# check for working copy

133

w = None

133

w = None

134

if b.rev() is None:

134

if b.rev() is None:

135

w = b

135

w = b

136

b = w.p1()

136

b = w.p1()

137

if a == b:

137

if a == b:

138

# short-circuit to avoid issues with merge states

138

# short-circuit to avoid issues with merge states

139

return _dirstatecopies(w)

139

return _dirstatecopies(w)

140

141

# find where new files came from

141

# find where new files came from

142

# we currently don't try to find where old files went, too expensive

142

# we currently don't try to find where old files went, too expensive

143

# this means we can miss a case like 'hg rm b; hg cp a b'

143

# this means we can miss a case like 'hg rm b; hg cp a b'

144

cm = {}

144

cm = {}

145

for f in b:

145

for f in b:

146

if f not in a:

146

if f not in a:

147

ofctx = _tracefile(b[f], a)

147

ofctx = _tracefile(b[f], a)

148

if ofctx:

148

if ofctx:

149

cm[f] = ofctx.path()

149

cm[f] = ofctx.path()

150

151

# combine copies from dirstate if necessary

151

# combine copies from dirstate if necessary

152

if w is not None:

152

if w is not None:

153

cm = _chain(a, w, cm, _dirstatecopies(w))

153

cm = _chain(a, w, cm, _dirstatecopies(w))

154

155

return cm

155

return cm

156

157

def _backwardcopies(a, b):

157

def _backwardcopies(a, b):

158

# because the forward mapping is 1:n, we can lose renames here

158

# because the forward mapping is 1:n, we can lose renames here

159

# in particular, we find renames better than copies

159

# in particular, we find renames better than copies

160

f = _forwardcopies(b, a)

160

f = _forwardcopies(b, a)

161

r = {}

161

r = {}

162

for k, v in f.iteritems():

162

for k, v in f.iteritems():

163

r[v] = k

163

r[v] = k

164

return r

164

return r

165

166

def pathcopies(x, y):

166

def pathcopies(x, y):

167

'''find {dst@y: src@x} copy mapping for directed compare'''

167

'''find {dst@y: src@x} copy mapping for directed compare'''

168

if x == y or not x or not y:

168

if x == y or not x or not y:

169

return {}

169

return {}

170

a = y.ancestor(x)

170

a = y.ancestor(x)

171

if a == x:

171

if a == x:

172

return _forwardcopies(x, y)

172

return _forwardcopies(x, y)

173

if a == y:

173

if a == y:

174

return _backwardcopies(x, y)

174

return _backwardcopies(x, y)

175

return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))

175

return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))

176

177

def mergecopies(repo, c1, c2, ca, checkdirs=True):

177

def mergecopies(repo, c1, c2, ca, checkdirs=True):

178

"""

178

"""

179

Find moves and copies between context c1 and c2

179

Find moves and copies between context c1 and c2 that are relevant

180

for merging.

181

182

Returns two dicts, "copy" and "diverge".

183

184

"copy" is a mapping from source name -> destination name,

185

where source is in c1 and destination is in c2 or vice-versa.

186

187

"diverge" is a mapping of source name -> list of destination names

188

for divergent renames.

180

"""

189

"""

181

# avoid silly behavior for update from empty dir

190

# avoid silly behavior for update from empty dir

182

if not c1 or not c2 or c1 == c2:

191

if not c1 or not c2 or c1 == c2:

183

return {}, {}

192

return {}, {}

184

193

185

# avoid silly behavior for parent -> working dir

194

# avoid silly behavior for parent -> working dir

186

if c2.node() is None and c1.node() == repo.dirstate.p1():

195

if c2.node() is None and c1.node() == repo.dirstate.p1():

187

return repo.dirstate.copies(), {}

196

return repo.dirstate.copies(), {}

188

197

189

limit = _findlimit(repo, c1.rev(), c2.rev())

198

limit = _findlimit(repo, c1.rev(), c2.rev())

190

if limit is None:

199

if limit is None:

191

# no common ancestor, no copies

200

# no common ancestor, no copies

192

return {}, {}

201

return {}, {}

193

m1 = c1.manifest()

202

m1 = c1.manifest()

194

m2 = c2.manifest()

203

m2 = c2.manifest()

195

ma = ca.manifest()

204

ma = ca.manifest()

196

205

197

def makectx(f, n):

206

def makectx(f, n):

198

if len(n) != 20: # in a working context?

207

if len(n) != 20: # in a working context?

199

if c1.rev() is None:

208

if c1.rev() is None:

200

return c1.filectx(f)

209

return c1.filectx(f)

201

return c2.filectx(f)

210

return c2.filectx(f)

202

return repo.filectx(f, fileid=n)

211

return repo.filectx(f, fileid=n)

203

212

204

ctx = util.lrucachefunc(makectx)

213

ctx = util.lrucachefunc(makectx)

205

copy = {}

214

copy = {}

206

fullcopy = {}

215

fullcopy = {}

207

diverge = {}

216

diverge = {}

208

217

209

def related(f1, f2, limit):

218

def related(f1, f2, limit):

210

# Walk back to common ancestor to see if the two files originate

219

# Walk back to common ancestor to see if the two files originate

211

# from the same file. Since workingfilectx's rev() is None it messes

220

# from the same file. Since workingfilectx's rev() is None it messes

212

# up the integer comparison logic, hence the pre-step check for

221

# up the integer comparison logic, hence the pre-step check for

213

# None (f1 and f2 can only be workingfilectx's initially).

222

# None (f1 and f2 can only be workingfilectx's initially).

214

223

215

if f1 == f2:

224

if f1 == f2:

216

return f1 # a match

225

return f1 # a match

217

226

218

g1, g2 = f1.ancestors(), f2.ancestors()

227

g1, g2 = f1.ancestors(), f2.ancestors()

219

try:

228

try:

220

f1r, f2r = f1.rev(), f2.rev()

229

f1r, f2r = f1.rev(), f2.rev()

221

230

222

if f1r is None:

231

if f1r is None:

223

f1 = g1.next()

232

f1 = g1.next()

224

if f2r is None:

233

if f2r is None:

225

f2 = g2.next()

234

f2 = g2.next()

226

235

227

while True:

236

while True:

228

f1r, f2r = f1.rev(), f2.rev()

237

f1r, f2r = f1.rev(), f2.rev()

229

if f1r > f2r:

238

if f1r > f2r:

230

f1 = g1.next()

239

f1 = g1.next()

231

elif f2r > f1r:

240

elif f2r > f1r:

232

f2 = g2.next()

241

f2 = g2.next()

233

elif f1 == f2:

242

elif f1 == f2:

234

return f1 # a match

243

return f1 # a match

235

elif f1r == f2r or f1r < limit or f2r < limit:

244

elif f1r == f2r or f1r < limit or f2r < limit:

236

return False # copy no longer relevant

245

return False # copy no longer relevant

237

except StopIteration:

246

except StopIteration:

238

return False

247

return False

239

248

240

def checkcopies(f, m1, m2):

249

def checkcopies(f, m1, m2):

241

'''check possible copies of f from m1 to m2'''

250

'''check possible copies of f from m1 to m2'''

242

of = None

251

of = None

243

seen = set([f])

252

seen = set([f])

244

for oc in ctx(f, m1[f]).ancestors():

253

for oc in ctx(f, m1[f]).ancestors():

245

ocr = oc.rev()

254

ocr = oc.rev()

246

of = oc.path()

255

of = oc.path()

247

if of in seen:

256

if of in seen:

248

# check limit late - grab last rename before

257

# check limit late - grab last rename before

249

if ocr < limit:

258

if ocr < limit:

250

break

259

break

251

continue

260

continue

252

seen.add(of)

261

seen.add(of)

253

262

254

fullcopy[f] = of # remember for dir rename detection

263

fullcopy[f] = of # remember for dir rename detection

255

if of not in m2:

264

if of not in m2:

256

continue # no match, keep looking

265

continue # no match, keep looking

257

if m2[of] == ma.get(of):

266

if m2[of] == ma.get(of):

258

break # no merge needed, quit early

267

break # no merge needed, quit early

259

c2 = ctx(of, m2[of])

268

c2 = ctx(of, m2[of])

260

cr = related(oc, c2, ca.rev())

269

cr = related(oc, c2, ca.rev())

261

if cr and (of == f or of == c2.path()): # non-divergent

270

if cr and (of == f or of == c2.path()): # non-divergent

262

copy[f] = of

271

copy[f] = of

263

of = None

272

of = None

264

break

273

break

265

274

266

if of in ma:

275

if of in ma:

267

diverge.setdefault(of, []).append(f)

276

diverge.setdefault(of, []).append(f)

268

277

269

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

278

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

270

279

271

u1 = _nonoverlap(m1, m2, ma)

280

u1 = _nonoverlap(m1, m2, ma)

272

u2 = _nonoverlap(m2, m1, ma)

281

u2 = _nonoverlap(m2, m1, ma)

273

282

274

if u1:

283

if u1:

275

repo.ui.debug(" unmatched files in local:\n %s\n"

284

repo.ui.debug(" unmatched files in local:\n %s\n"

276

% "\n ".join(u1))

285

% "\n ".join(u1))

277

if u2:

286

if u2:

278

repo.ui.debug(" unmatched files in other:\n %s\n"

287

repo.ui.debug(" unmatched files in other:\n %s\n"

279

% "\n ".join(u2))

288

% "\n ".join(u2))

280

289

281

for f in u1:

290

for f in u1:

282

checkcopies(f, m1, m2)

291

checkcopies(f, m1, m2)

283

for f in u2:

292

for f in u2:

284

checkcopies(f, m2, m1)

293

checkcopies(f, m2, m1)

285

294

286

diverge2 = set()

295

diverge2 = set()

287

for of, fl in diverge.items():

296

for of, fl in diverge.items():

288

if len(fl) == 1 or of in c2:

297

if len(fl) == 1 or of in c2:

289

del diverge[of] # not actually divergent, or not a rename

298

del diverge[of] # not actually divergent, or not a rename

290

else:

299

else:

291

diverge2.update(fl) # reverse map for below

300

diverge2.update(fl) # reverse map for below

292

301

293

if fullcopy:

302

if fullcopy:

294

repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")

303

repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")

295

for f in fullcopy:

304

for f in fullcopy:

296

note = ""

305

note = ""

297

if f in copy:

306

if f in copy:

298

note += "*"

307

note += "*"

299

if f in diverge2:

308

if f in diverge2:

300

note += "!"

309

note += "!"

301

repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))

310

repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))

302

del diverge2

311

del diverge2

303

312

304

if not fullcopy or not checkdirs:

313

if not fullcopy or not checkdirs:

305

return copy, diverge

314

return copy, diverge

306

315

307

repo.ui.debug(" checking for directory renames\n")

316

repo.ui.debug(" checking for directory renames\n")

308

317

309

# generate a directory move map

318

# generate a directory move map

310

d1, d2 = _dirs(m1), _dirs(m2)

319

d1, d2 = _dirs(m1), _dirs(m2)

311

invalid = set()

320

invalid = set()

312

dirmove = {}

321

dirmove = {}

313

322

314

# examine each file copy for a potential directory move, which is

323

# examine each file copy for a potential directory move, which is

315

# when all the files in a directory are moved to a new directory

324

# when all the files in a directory are moved to a new directory

316

for dst, src in fullcopy.iteritems():

325

for dst, src in fullcopy.iteritems():

317

dsrc, ddst = _dirname(src), _dirname(dst)

326

dsrc, ddst = _dirname(src), _dirname(dst)

318

if dsrc in invalid:

327

if dsrc in invalid:

319

# already seen to be uninteresting

328

# already seen to be uninteresting

320

continue

329

continue

321

elif dsrc in d1 and ddst in d1:

330

elif dsrc in d1 and ddst in d1:

322

# directory wasn't entirely moved locally

331

# directory wasn't entirely moved locally

323

invalid.add(dsrc)

332

invalid.add(dsrc)

324

elif dsrc in d2 and ddst in d2:

333

elif dsrc in d2 and ddst in d2:

325

# directory wasn't entirely moved remotely

334

# directory wasn't entirely moved remotely

326

invalid.add(dsrc)

335

invalid.add(dsrc)

327

elif dsrc in dirmove and dirmove[dsrc] != ddst:

336

elif dsrc in dirmove and dirmove[dsrc] != ddst:

328

# files from the same directory moved to two different places

337

# files from the same directory moved to two different places

329

invalid.add(dsrc)

338

invalid.add(dsrc)

330

else:

339

else:

331

# looks good so far

340

# looks good so far

332

dirmove[dsrc + "/"] = ddst + "/"

341

dirmove[dsrc + "/"] = ddst + "/"

333

342

334

for i in invalid:

343

for i in invalid:

335

if i in dirmove:

344

if i in dirmove:

336

del dirmove[i]

345

del dirmove[i]

337

del d1, d2, invalid

346

del d1, d2, invalid

338

347

339

if not dirmove:

348

if not dirmove:

340

return copy, diverge

349

return copy, diverge

341

350

342

for d in dirmove:

351

for d in dirmove:

343

repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))

352

repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))

344

353

345

# check unaccounted nonoverlapping files against directory moves

354

# check unaccounted nonoverlapping files against directory moves

346

for f in u1 + u2:

355

for f in u1 + u2:

347

if f not in fullcopy:

356

if f not in fullcopy:

348

for d in dirmove:

357

for d in dirmove:

349

if f.startswith(d):

358

if f.startswith(d):

350

# new file added in a directory that was moved, move it

359

# new file added in a directory that was moved, move it

351

df = dirmove[d] + f[len(d):]

360

df = dirmove[d] + f[len(d):]

352

if df not in copy:

361

if df not in copy:

353

copy[f] = df

362

copy[f] = df

354

repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))

363

repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))

355

break

364

break

356

365

357

return copy, diverge

366

return copy, diverge

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
             def _nonoverlap(d1, d2, d3):
                 "Return list of elements in d1 not in d2 or d3"
                 return sorted([d for d in d1 if d not in d3 and d not in d2])
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _dirs(files):
                 d = set()
                 for f in files:
                     f = _dirname(f)
                     while f not in d:
                         d.add(f)
                         f = _dirname(f)
                 return d
             def _findlimit(repo, a, b):
                 """Find the earliest revision that's an ancestor of a or b but not both,
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 return limit
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, actx):
                 '''return file context that is the ancestor of fctx present in actx'''
                 stop = actx.rev()
                 am = actx.manifest()
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if f.rev() < stop:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 for f in b:
                     if f not in a:
                         ofctx = _tracefile(b[f], a)
                         if ofctx:
                             cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardcopies(a, b):
                 # because the forward mapping is 1:n, we can lose renames here
                 # in particular, we find renames better than copies
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in f.iteritems():
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardcopies(x, y)
                 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
             def mergecopies(repo, c1, c2, ca, checkdirs=True):
                 """
-                Find moves and copies between context c1 and c2
+                Find moves and copies between context c1 and c2 that are relevant
+                for merging.
+                Returns two dicts, "copy" and "diverge".
+                "copy" is a mapping from source name -> destination name,
+                where source is in c1 and destination is in c2 or vice-versa.
+                "diverge" is a mapping of source name -> list of destination names
+                for divergent renames.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def makectx(f, n):
                     if len(n) != 20: # in a working context?
                         if c1.rev() is None:
                             return c1.filectx(f)
                         return c2.filectx(f)
                     return repo.filectx(f, fileid=n)
                 ctx = util.lrucachefunc(makectx)
                 copy = {}
                 fullcopy = {}
                 diverge = {}
                 def related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.rev(), f2.rev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.rev(), f2.rev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 def checkcopies(f, m1, m2):
                     '''check possible copies of f from m1 to m2'''
                     of = None
                     seen = set([f])
                     for oc in ctx(f, m1[f]).ancestors():
                         ocr = oc.rev()
                         of = oc.path()
                         if of in seen:
                             # check limit late - grab last rename before
                             if ocr < limit:
                                 break
                             continue
                         seen.add(of)
                         fullcopy[f] = of # remember for dir rename detection
                         if of not in m2:
                             continue # no match, keep looking
                         if m2[of] == ma.get(of):
                             break # no merge needed, quit early
                         c2 = ctx(of, m2[of])
                         cr = related(oc, c2, ca.rev())
                         if cr and (of == f or of == c2.path()): # non-divergent
                             copy[f] = of
                             of = None
                             break
                     if of in ma:
                         diverge.setdefault(of, []).append(f)
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 u1 = _nonoverlap(m1, m2, ma)
                 u2 = _nonoverlap(m2, m1, ma)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 for f in u1:
                     checkcopies(f, m1, m2)
                 for f in u2:
                     checkcopies(f, m2, m1)
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                     else:
                         diverge2.update(fl) # reverse map for below
                 if fullcopy:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent):\n")
                     for f in fullcopy:
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         repo.ui.debug("   %s -> %s %s\n" % (f, fullcopy[f], note))
                 del diverge2
                 if not fullcopy or not checkdirs:
                     return copy, diverge
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = _dirs(m1), _dirs(m2)
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, diverge
                 for d in dirmove:
                     repo.ui.debug("  dir %s -> %s\n" % (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     copy[f] = df
                                     repo.ui.debug("  file %s -> %s\n" % (f, copy[f]))
                                 break
                 return copy, diverge