upstream/mercurial-mirror Commit - r18878:3cfaace0

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _nonoverlap(d1, d2, d3):

11

def _nonoverlap(d1, d2, d3):

12

"Return list of elements in d1 not in d2 or d3"

12

"Return list of elements in d1 not in d2 or d3"

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

14

15

def _dirname(f):

15

def _dirname(f):

16

s = f.rfind("/")

16

s = f.rfind("/")

17

if s == -1:

17

if s == -1:

18

return ""

18

return ""

19

return f[:s]

19

return f[:s]

20

21

def _findlimit(repo, a, b):

21

def _findlimit(repo, a, b):

22

"""Find the earliest revision that's an ancestor of a or b but not both,

22

"""Find the earliest revision that's an ancestor of a or b but not both,

23

None if no such revision exists.

23

None if no such revision exists.

24

"""

24

"""

25

# basic idea:

25

# basic idea:

26

# - mark a and b with different sides

26

# - mark a and b with different sides

27

# - if a parent's children are all on the same side, the parent is

27

# - if a parent's children are all on the same side, the parent is

28

# on that side, otherwise it is on no side

28

# on that side, otherwise it is on no side

29

# - walk the graph in topological order with the help of a heap;

29

# - walk the graph in topological order with the help of a heap;

30

# - add unseen parents to side map

30

# - add unseen parents to side map

31

# - clear side of any parent that has children on different sides

31

# - clear side of any parent that has children on different sides

32

# - track number of interesting revs that might still be on a side

32

# - track number of interesting revs that might still be on a side

33

# - track the lowest interesting rev seen

33

# - track the lowest interesting rev seen

34

# - quit when interesting revs is zero

34

# - quit when interesting revs is zero

35

36

cl = repo.changelog

36

cl = repo.changelog

37

working = len(cl) # pseudo rev for the working directory

37

working = len(cl) # pseudo rev for the working directory

38

if a is None:

38

if a is None:

39

a = working

39

a = working

40

if b is None:

40

if b is None:

41

b = working

41

b = working

42

43

side = {a: -1, b: 1}

43

side = {a: -1, b: 1}

44

visit = [-a, -b]

44

visit = [-a, -b]

45

heapq.heapify(visit)

45

heapq.heapify(visit)

46

interesting = len(visit)

46

interesting = len(visit)

47

hascommonancestor = False

47

hascommonancestor = False

48

limit = working

48

limit = working

49

50

while interesting:

50

while interesting:

51

r = -heapq.heappop(visit)

51

r = -heapq.heappop(visit)

52

if r == working:

52

if r == working:

53

parents = [cl.rev(p) for p in repo.dirstate.parents()]

53

parents = [cl.rev(p) for p in repo.dirstate.parents()]

54

else:

54

else:

55

parents = cl.parentrevs(r)

55

parents = cl.parentrevs(r)

56

for p in parents:

56

for p in parents:

57

if p < 0:

57

if p < 0:

58

continue

58

continue

59

if p not in side:

59

if p not in side:

60

# first time we see p; add it to visit

60

# first time we see p; add it to visit

61

side[p] = side[r]

61

side[p] = side[r]

62

if side[p]:

62

if side[p]:

63

interesting += 1

63

interesting += 1

64

heapq.heappush(visit, -p)

64

heapq.heappush(visit, -p)

65

elif side[p] and side[p] != side[r]:

65

elif side[p] and side[p] != side[r]:

66

# p was interesting but now we know better

66

# p was interesting but now we know better

67

side[p] = 0

67

side[p] = 0

68

interesting -= 1

68

interesting -= 1

69

hascommonancestor = True

69

hascommonancestor = True

70

if side[r]:

70

if side[r]:

71

limit = r # lowest rev visited

71

limit = r # lowest rev visited

72

interesting -= 1

72

interesting -= 1

73

74

if not hascommonancestor:

74

if not hascommonancestor:

75

return None

75

return None

76

return limit

76

return limit

77

78

def _chain(src, dst, a, b):

78

def _chain(src, dst, a, b):

79

'''chain two sets of copies a->b'''

79

'''chain two sets of copies a->b'''

80

t = a.copy()

80

t = a.copy()

81

for k, v in b.iteritems():

81

for k, v in b.iteritems():

82

if v in t:

82

if v in t:

83

# found a chain

83

# found a chain

84

if t[v] != k:

84

if t[v] != k:

85

# file wasn't renamed back to itself

85

# file wasn't renamed back to itself

86

t[k] = t[v]

86

t[k] = t[v]

87

if v not in dst:

87

if v not in dst:

88

# chain was a rename, not a copy

88

# chain was a rename, not a copy

89

del t[v]

89

del t[v]

90

if v in src:

90

if v in src:

91

# file is a copy of an existing file

91

# file is a copy of an existing file

92

t[k] = v

92

t[k] = v

93

94

# remove criss-crossed copies

94

# remove criss-crossed copies

95

for k, v in t.items():

95

for k, v in t.items():

96

if k in src and v in dst:

96

if k in src and v in dst:

97

del t[k]

97

del t[k]

98

99

return t

99

return t

100

101

def _tracefile(fctx, actx):

101

def _tracefile(fctx, actx):

102

'''return file context that is the ancestor of fctx present in actx'''

102

'''return file context that is the ancestor of fctx present in actx'''

103

stop = actx.rev()

103

stop = actx.rev()

104

am = actx.manifest()

104

am = actx.manifest()

105

106

for f in fctx.ancestors():

106

for f in fctx.ancestors():

107

if am.get(f.path(), None) == f.filenode():

107

if am.get(f.path(), None) == f.filenode():

108

return f

108

return f

109

if f.rev() < stop:

109

if f.rev() < stop:

110

return None

110

return None

111

112

def _dirstatecopies(d):

112

def _dirstatecopies(d):

113

ds = d._repo.dirstate

113

ds = d._repo.dirstate

114

c = ds.copies().copy()

114

c = ds.copies().copy()

115

for k in c.keys():

115

for k in c.keys():

116

if ds[k] not in 'anm':

116

if ds[k] not in 'anm':

117

del c[k]

117

del c[k]

118

return c

118

return c

119

120

def _forwardcopies(a, b):

120

def _forwardcopies(a, b):

121

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

121

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

122

123

# check for working copy

123

# check for working copy

124

w = None

124

w = None

125

if b.rev() is None:

125

if b.rev() is None:

126

w = b

126

w = b

127

b = w.p1()

127

b = w.p1()

128

if a == b:

128

if a == b:

129

# short-circuit to avoid issues with merge states

129

# short-circuit to avoid issues with merge states

130

return _dirstatecopies(w)

130

return _dirstatecopies(w)

131

132

# find where new files came from

132

# find where new files came from

133

# we currently don't try to find where old files went, too expensive

133

# we currently don't try to find where old files went, too expensive

134

# this means we can miss a case like 'hg rm b; hg cp a b'

134

# this means we can miss a case like 'hg rm b; hg cp a b'

135

cm = {}

135

cm = {}

136

for f in b:

136

missing = set(b.manifest().iterkeys())

137

if f not in a:

137

missing.difference_update(a.manifest().iterkeys())

138

139

for f in missing:

138

ofctx = _tracefile(b[f], a)

140

ofctx = _tracefile(b[f], a)

139

if ofctx:

141

if ofctx:

140

cm[f] = ofctx.path()

142

cm[f] = ofctx.path()

141

143

142

# combine copies from dirstate if necessary

144

# combine copies from dirstate if necessary

143

if w is not None:

145

if w is not None:

144

cm = _chain(a, w, cm, _dirstatecopies(w))

146

cm = _chain(a, w, cm, _dirstatecopies(w))

145

147

146

return cm

148

return cm

147

149

148

def _backwardrenames(a, b):

150

def _backwardrenames(a, b):

149

# Even though we're not taking copies into account, 1:n rename situations

151

# Even though we're not taking copies into account, 1:n rename situations

150

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

152

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

151

# arbitrarily pick one of the renames.

153

# arbitrarily pick one of the renames.

152

f = _forwardcopies(b, a)

154

f = _forwardcopies(b, a)

153

r = {}

155

r = {}

154

for k, v in sorted(f.iteritems()):

156

for k, v in sorted(f.iteritems()):

155

# remove copies

157

# remove copies

156

if v in a:

158

if v in a:

157

continue

159

continue

158

r[v] = k

160

r[v] = k

159

return r

161

return r

160

162

161

def pathcopies(x, y):

163

def pathcopies(x, y):

162

'''find {dst@y: src@x} copy mapping for directed compare'''

164

'''find {dst@y: src@x} copy mapping for directed compare'''

163

if x == y or not x or not y:

165

if x == y or not x or not y:

164

return {}

166

return {}

165

a = y.ancestor(x)

167

a = y.ancestor(x)

166

if a == x:

168

if a == x:

167

return _forwardcopies(x, y)

169

return _forwardcopies(x, y)

168

if a == y:

170

if a == y:

169

return _backwardrenames(x, y)

171

return _backwardrenames(x, y)

170

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

172

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

171

173

172

def mergecopies(repo, c1, c2, ca):

174

def mergecopies(repo, c1, c2, ca):

173

"""

175

"""

174

Find moves and copies between context c1 and c2 that are relevant

176

Find moves and copies between context c1 and c2 that are relevant

175

for merging.

177

for merging.

176

178

177

Returns four dicts: "copy", "movewithdir", "diverge", and

179

Returns four dicts: "copy", "movewithdir", "diverge", and

178

"renamedelete".

180

"renamedelete".

179

181

180

"copy" is a mapping from destination name -> source name,

182

"copy" is a mapping from destination name -> source name,

181

where source is in c1 and destination is in c2 or vice-versa.

183

where source is in c1 and destination is in c2 or vice-versa.

182

184

183

"movewithdir" is a mapping from source name -> destination name,

185

"movewithdir" is a mapping from source name -> destination name,

184

where the file at source present in one context but not the other

186

where the file at source present in one context but not the other

185

needs to be moved to destination by the merge process, because the

187

needs to be moved to destination by the merge process, because the

186

other context moved the directory it is in.

188

other context moved the directory it is in.

187

189

188

"diverge" is a mapping of source name -> list of destination names

190

"diverge" is a mapping of source name -> list of destination names

189

for divergent renames.

191

for divergent renames.

190

192

191

"renamedelete" is a mapping of source name -> list of destination

193

"renamedelete" is a mapping of source name -> list of destination

192

names for files deleted in c1 that were renamed in c2 or vice-versa.

194

names for files deleted in c1 that were renamed in c2 or vice-versa.

193

"""

195

"""

194

# avoid silly behavior for update from empty dir

196

# avoid silly behavior for update from empty dir

195

if not c1 or not c2 or c1 == c2:

197

if not c1 or not c2 or c1 == c2:

196

return {}, {}, {}, {}

198

return {}, {}, {}, {}

197

199

198

# avoid silly behavior for parent -> working dir

200

# avoid silly behavior for parent -> working dir

199

if c2.node() is None and c1.node() == repo.dirstate.p1():

201

if c2.node() is None and c1.node() == repo.dirstate.p1():

200

return repo.dirstate.copies(), {}, {}, {}

202

return repo.dirstate.copies(), {}, {}, {}

201

203

202

limit = _findlimit(repo, c1.rev(), c2.rev())

204

limit = _findlimit(repo, c1.rev(), c2.rev())

203

if limit is None:

205

if limit is None:

204

# no common ancestor, no copies

206

# no common ancestor, no copies

205

return {}, {}, {}, {}

207

return {}, {}, {}, {}

206

m1 = c1.manifest()

208

m1 = c1.manifest()

207

m2 = c2.manifest()

209

m2 = c2.manifest()

208

ma = ca.manifest()

210

ma = ca.manifest()

209

211

210

def makectx(f, n):

212

def makectx(f, n):

211

if len(n) != 20: # in a working context?

213

if len(n) != 20: # in a working context?

212

if c1.rev() is None:

214

if c1.rev() is None:

213

return c1.filectx(f)

215

return c1.filectx(f)

214

return c2.filectx(f)

216

return c2.filectx(f)

215

return repo.filectx(f, fileid=n)

217

return repo.filectx(f, fileid=n)

216

218

217

ctx = util.lrucachefunc(makectx)

219

ctx = util.lrucachefunc(makectx)

218

copy = {}

220

copy = {}

219

movewithdir = {}

221

movewithdir = {}

220

fullcopy = {}

222

fullcopy = {}

221

diverge = {}

223

diverge = {}

222

224

223

def related(f1, f2, limit):

225

def related(f1, f2, limit):

224

# Walk back to common ancestor to see if the two files originate

226

# Walk back to common ancestor to see if the two files originate

225

# from the same file. Since workingfilectx's rev() is None it messes

227

# from the same file. Since workingfilectx's rev() is None it messes

226

# up the integer comparison logic, hence the pre-step check for

228

# up the integer comparison logic, hence the pre-step check for

227

# None (f1 and f2 can only be workingfilectx's initially).

229

# None (f1 and f2 can only be workingfilectx's initially).

228

230

229

if f1 == f2:

231

if f1 == f2:

230

return f1 # a match

232

return f1 # a match

231

233

232

g1, g2 = f1.ancestors(), f2.ancestors()

234

g1, g2 = f1.ancestors(), f2.ancestors()

233

try:

235

try:

234

f1r, f2r = f1.rev(), f2.rev()

236

f1r, f2r = f1.rev(), f2.rev()

235

237

236

if f1r is None:

238

if f1r is None:

237

f1 = g1.next()

239

f1 = g1.next()

238

if f2r is None:

240

if f2r is None:

239

f2 = g2.next()

241

f2 = g2.next()

240

242

241

while True:

243

while True:

242

f1r, f2r = f1.rev(), f2.rev()

244

f1r, f2r = f1.rev(), f2.rev()

243

if f1r > f2r:

245

if f1r > f2r:

244

f1 = g1.next()

246

f1 = g1.next()

245

elif f2r > f1r:

247

elif f2r > f1r:

246

f2 = g2.next()

248

f2 = g2.next()

247

elif f1 == f2:

249

elif f1 == f2:

248

return f1 # a match

250

return f1 # a match

249

elif f1r == f2r or f1r < limit or f2r < limit:

251

elif f1r == f2r or f1r < limit or f2r < limit:

250

return False # copy no longer relevant

252

return False # copy no longer relevant

251

except StopIteration:

253

except StopIteration:

252

return False

254

return False

253

255

254

def checkcopies(f, m1, m2):

256

def checkcopies(f, m1, m2):

255

'''check possible copies of f from m1 to m2'''

257

'''check possible copies of f from m1 to m2'''

256

of = None

258

of = None

257

seen = set([f])

259

seen = set([f])

258

for oc in ctx(f, m1[f]).ancestors():

260

for oc in ctx(f, m1[f]).ancestors():

259

ocr = oc.rev()

261

ocr = oc.rev()

260

of = oc.path()

262

of = oc.path()

261

if of in seen:

263

if of in seen:

262

# check limit late - grab last rename before

264

# check limit late - grab last rename before

263

if ocr < limit:

265

if ocr < limit:

264

break

266

break

265

continue

267

continue

266

seen.add(of)

268

seen.add(of)

267

269

268

fullcopy[f] = of # remember for dir rename detection

270

fullcopy[f] = of # remember for dir rename detection

269

if of not in m2:

271

if of not in m2:

270

continue # no match, keep looking

272

continue # no match, keep looking

271

if m2[of] == ma.get(of):

273

if m2[of] == ma.get(of):

272

break # no merge needed, quit early

274

break # no merge needed, quit early

273

c2 = ctx(of, m2[of])

275

c2 = ctx(of, m2[of])

274

cr = related(oc, c2, ca.rev())

276

cr = related(oc, c2, ca.rev())

275

if cr and (of == f or of == c2.path()): # non-divergent

277

if cr and (of == f or of == c2.path()): # non-divergent

276

copy[f] = of

278

copy[f] = of

277

of = None

279

of = None

278

break

280

break

279

281

280

if of in ma:

282

if of in ma:

281

diverge.setdefault(of, []).append(f)

283

diverge.setdefault(of, []).append(f)

282

284

283

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

285

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

284

286

285

u1 = _nonoverlap(m1, m2, ma)

287

u1 = _nonoverlap(m1, m2, ma)

286

u2 = _nonoverlap(m2, m1, ma)

288

u2 = _nonoverlap(m2, m1, ma)

287

289

288

if u1:

290

if u1:

289

repo.ui.debug(" unmatched files in local:\n %s\n"

291

repo.ui.debug(" unmatched files in local:\n %s\n"

290

% "\n ".join(u1))

292

% "\n ".join(u1))

291

if u2:

293

if u2:

292

repo.ui.debug(" unmatched files in other:\n %s\n"

294

repo.ui.debug(" unmatched files in other:\n %s\n"

293

% "\n ".join(u2))

295

% "\n ".join(u2))

294

296

295

for f in u1:

297

for f in u1:

296

checkcopies(f, m1, m2)

298

checkcopies(f, m1, m2)

297

for f in u2:

299

for f in u2:

298

checkcopies(f, m2, m1)

300

checkcopies(f, m2, m1)

299

301

300

renamedelete = {}

302

renamedelete = {}

301

renamedelete2 = set()

303

renamedelete2 = set()

302

diverge2 = set()

304

diverge2 = set()

303

for of, fl in diverge.items():

305

for of, fl in diverge.items():

304

if len(fl) == 1 or of in c1 or of in c2:

306

if len(fl) == 1 or of in c1 or of in c2:

305

del diverge[of] # not actually divergent, or not a rename

307

del diverge[of] # not actually divergent, or not a rename

306

if of not in c1 and of not in c2:

308

if of not in c1 and of not in c2:

307

# renamed on one side, deleted on the other side, but filter

309

# renamed on one side, deleted on the other side, but filter

308

# out files that have been renamed and then deleted

310

# out files that have been renamed and then deleted

309

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

311

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

310

renamedelete2.update(fl) # reverse map for below

312

renamedelete2.update(fl) # reverse map for below

311

else:

313

else:

312

diverge2.update(fl) # reverse map for below

314

diverge2.update(fl) # reverse map for below

313

315

314

if fullcopy:

316

if fullcopy:

315

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

317

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

316

"% = renamed and deleted):\n")

318

"% = renamed and deleted):\n")

317

for f in sorted(fullcopy):

319

for f in sorted(fullcopy):

318

note = ""

320

note = ""

319

if f in copy:

321

if f in copy:

320

note += "*"

322

note += "*"

321

if f in diverge2:

323

if f in diverge2:

322

note += "!"

324

note += "!"

323

if f in renamedelete2:

325

if f in renamedelete2:

324

note += "%"

326

note += "%"

325

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

327

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

326

note))

328

note))

327

del diverge2

329

del diverge2

328

330

329

if not fullcopy:

331

if not fullcopy:

330

return copy, movewithdir, diverge, renamedelete

332

return copy, movewithdir, diverge, renamedelete

331

333

332

repo.ui.debug(" checking for directory renames\n")

334

repo.ui.debug(" checking for directory renames\n")

333

335

334

# generate a directory move map

336

# generate a directory move map

335

d1, d2 = c1.dirs(), c2.dirs()

337

d1, d2 = c1.dirs(), c2.dirs()

336

d1.add('')

338

d1.add('')

337

d2.add('')

339

d2.add('')

338

invalid = set()

340

invalid = set()

339

dirmove = {}

341

dirmove = {}

340

342

341

# examine each file copy for a potential directory move, which is

343

# examine each file copy for a potential directory move, which is

342

# when all the files in a directory are moved to a new directory

344

# when all the files in a directory are moved to a new directory

343

for dst, src in fullcopy.iteritems():

345

for dst, src in fullcopy.iteritems():

344

dsrc, ddst = _dirname(src), _dirname(dst)

346

dsrc, ddst = _dirname(src), _dirname(dst)

345

if dsrc in invalid:

347

if dsrc in invalid:

346

# already seen to be uninteresting

348

# already seen to be uninteresting

347

continue

349

continue

348

elif dsrc in d1 and ddst in d1:

350

elif dsrc in d1 and ddst in d1:

349

# directory wasn't entirely moved locally

351

# directory wasn't entirely moved locally

350

invalid.add(dsrc)

352

invalid.add(dsrc)

351

elif dsrc in d2 and ddst in d2:

353

elif dsrc in d2 and ddst in d2:

352

# directory wasn't entirely moved remotely

354

# directory wasn't entirely moved remotely

353

invalid.add(dsrc)

355

invalid.add(dsrc)

354

elif dsrc in dirmove and dirmove[dsrc] != ddst:

356

elif dsrc in dirmove and dirmove[dsrc] != ddst:

355

# files from the same directory moved to two different places

357

# files from the same directory moved to two different places

356

invalid.add(dsrc)

358

invalid.add(dsrc)

357

else:

359

else:

358

# looks good so far

360

# looks good so far

359

dirmove[dsrc + "/"] = ddst + "/"

361

dirmove[dsrc + "/"] = ddst + "/"

360

362

361

for i in invalid:

363

for i in invalid:

362

if i in dirmove:

364

if i in dirmove:

363

del dirmove[i]

365

del dirmove[i]

364

del d1, d2, invalid

366

del d1, d2, invalid

365

367

366

if not dirmove:

368

if not dirmove:

367

return copy, movewithdir, diverge, renamedelete

369

return copy, movewithdir, diverge, renamedelete

368

370

369

for d in dirmove:

371

for d in dirmove:

370

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

372

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

371

(d, dirmove[d]))

373

(d, dirmove[d]))

372

374

373

# check unaccounted nonoverlapping files against directory moves

375

# check unaccounted nonoverlapping files against directory moves

374

for f in u1 + u2:

376

for f in u1 + u2:

375

if f not in fullcopy:

377

if f not in fullcopy:

376

for d in dirmove:

378

for d in dirmove:

377

if f.startswith(d):

379

if f.startswith(d):

378

# new file added in a directory that was moved, move it

380

# new file added in a directory that was moved, move it

379

df = dirmove[d] + f[len(d):]

381

df = dirmove[d] + f[len(d):]

380

if df not in copy:

382

if df not in copy:

381

movewithdir[f] = df

383

movewithdir[f] = df

382

repo.ui.debug((" pending file src: '%s' -> "

384

repo.ui.debug((" pending file src: '%s' -> "

383

"dst: '%s'\n") % (f, df))

385

"dst: '%s'\n") % (f, df))

384

break

386

break

385

387

386

return copy, movewithdir, diverge, renamedelete

388

return copy, movewithdir, diverge, renamedelete

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
             def _nonoverlap(d1, d2, d3):
                 "Return list of elements in d1 not in d2 or d3"
                 return sorted([d for d in d1 if d not in d3 and d not in d2])
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _findlimit(repo, a, b):
                 """Find the earliest revision that's an ancestor of a or b but not both,
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 return limit
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, actx):
                 '''return file context that is the ancestor of fctx present in actx'''
                 stop = actx.rev()
                 am = actx.manifest()
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if f.rev() < stop:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
-                for f in b:
+                missing = set(b.manifest().iterkeys())
-                    if f not in a:
+                missing.difference_update(a.manifest().iterkeys())
+                for f in missing:
                     ofctx = _tracefile(b[f], a)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def makectx(f, n):
                     if len(n) != 20: # in a working context?
                         if c1.rev() is None:
                             return c1.filectx(f)
                         return c2.filectx(f)
                     return repo.filectx(f, fileid=n)
                 ctx = util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
                 def related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.rev(), f2.rev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.rev(), f2.rev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 def checkcopies(f, m1, m2):
                     '''check possible copies of f from m1 to m2'''
                     of = None
                     seen = set([f])
                     for oc in ctx(f, m1[f]).ancestors():
                         ocr = oc.rev()
                         of = oc.path()
                         if of in seen:
                             # check limit late - grab last rename before
                             if ocr < limit:
                                 break
                             continue
                         seen.add(of)
                         fullcopy[f] = of # remember for dir rename detection
                         if of not in m2:
                             continue # no match, keep looking
                         if m2[of] == ma.get(of):
                             break # no merge needed, quit early
                         c2 = ctx(of, m2[of])
                         cr = related(oc, c2, ca.rev())
                         if cr and (of == f or of == c2.path()): # non-divergent
                             copy[f] = of
                             of = None
                             break
                     if of in ma:
                         diverge.setdefault(of, []).append(f)
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 u1 = _nonoverlap(m1, m2, ma)
                 u2 = _nonoverlap(m2, m1, ma)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 for f in u1:
                     checkcopies(f, m1, m2)
                 for f in u2:
                     checkcopies(f, m2, m1)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
                 if fullcopy:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 d1.add('')
                 d2.add('')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete