upstream/mercurial-mirror Commit - r24625:2cebf17c

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _dirname(f):

11

def _dirname(f):

12

s = f.rfind("/")

12

s = f.rfind("/")

13

if s == -1:

13

if s == -1:

14

return ""

14

return ""

15

return f[:s]

15

return f[:s]

16

17

def _findlimit(repo, a, b):

17

def _findlimit(repo, a, b):

18

"""

18

"""

19

Find the last revision that needs to be checked to ensure that a full

19

Find the last revision that needs to be checked to ensure that a full

20

transitive closure for file copies can be properly calculated.

20

transitive closure for file copies can be properly calculated.

21

Generally, this means finding the earliest revision number that's an

21

Generally, this means finding the earliest revision number that's an

22

ancestor of a or b but not both, except when a or b is a direct descendent

22

ancestor of a or b but not both, except when a or b is a direct descendent

23

of the other, in which case we can return the minimum revnum of a and b.

23

of the other, in which case we can return the minimum revnum of a and b.

24

None if no such revision exists.

24

None if no such revision exists.

25

"""

25

"""

26

27

# basic idea:

27

# basic idea:

28

# - mark a and b with different sides

28

# - mark a and b with different sides

29

# - if a parent's children are all on the same side, the parent is

29

# - if a parent's children are all on the same side, the parent is

30

# on that side, otherwise it is on no side

30

# on that side, otherwise it is on no side

31

# - walk the graph in topological order with the help of a heap;

31

# - walk the graph in topological order with the help of a heap;

32

# - add unseen parents to side map

32

# - add unseen parents to side map

33

# - clear side of any parent that has children on different sides

33

# - clear side of any parent that has children on different sides

34

# - track number of interesting revs that might still be on a side

34

# - track number of interesting revs that might still be on a side

35

# - track the lowest interesting rev seen

35

# - track the lowest interesting rev seen

36

# - quit when interesting revs is zero

36

# - quit when interesting revs is zero

37

38

cl = repo.changelog

38

cl = repo.changelog

39

working = len(cl) # pseudo rev for the working directory

39

working = len(cl) # pseudo rev for the working directory

40

if a is None:

40

if a is None:

41

a = working

41

a = working

42

if b is None:

42

if b is None:

43

b = working

43

b = working

44

45

side = {a: -1, b: 1}

45

side = {a: -1, b: 1}

46

visit = [-a, -b]

46

visit = [-a, -b]

47

heapq.heapify(visit)

47

heapq.heapify(visit)

48

interesting = len(visit)

48

interesting = len(visit)

49

hascommonancestor = False

49

hascommonancestor = False

50

limit = working

50

limit = working

51

52

while interesting:

52

while interesting:

53

r = -heapq.heappop(visit)

53

r = -heapq.heappop(visit)

54

if r == working:

54

if r == working:

55

parents = [cl.rev(p) for p in repo.dirstate.parents()]

55

parents = [cl.rev(p) for p in repo.dirstate.parents()]

56

else:

56

else:

57

parents = cl.parentrevs(r)

57

parents = cl.parentrevs(r)

58

for p in parents:

58

for p in parents:

59

if p < 0:

59

if p < 0:

60

continue

60

continue

61

if p not in side:

61

if p not in side:

62

# first time we see p; add it to visit

62

# first time we see p; add it to visit

63

side[p] = side[r]

63

side[p] = side[r]

64

if side[p]:

64

if side[p]:

65

interesting += 1

65

interesting += 1

66

heapq.heappush(visit, -p)

66

heapq.heappush(visit, -p)

67

elif side[p] and side[p] != side[r]:

67

elif side[p] and side[p] != side[r]:

68

# p was interesting but now we know better

68

# p was interesting but now we know better

69

side[p] = 0

69

side[p] = 0

70

interesting -= 1

70

interesting -= 1

71

hascommonancestor = True

71

hascommonancestor = True

72

if side[r]:

72

if side[r]:

73

limit = r # lowest rev visited

73

limit = r # lowest rev visited

74

interesting -= 1

74

interesting -= 1

75

76

if not hascommonancestor:

76

if not hascommonancestor:

77

return None

77

return None

78

79

# Consider the following flow (see test-commit-amend.t under issue4405):

79

# Consider the following flow (see test-commit-amend.t under issue4405):

80

# 1/ File 'a0' committed

80

# 1/ File 'a0' committed

81

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

81

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

82

# 3/ Move back to first commit

82

# 3/ Move back to first commit

83

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

83

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

84

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

84

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

85

#

85

#

86

# During the amend in step five, we will be in this state:

86

# During the amend in step five, we will be in this state:

87

#

87

#

88

# @ 3 temporary amend commit for a1-amend

88

# @ 3 temporary amend commit for a1-amend

89

# |

89

# |

90

# o 2 a1-amend

90

# o 2 a1-amend

91

# |

91

# |

92

# | o 1 a1

92

# | o 1 a1

93

# |/

93

# |/

94

# o 0 a0

94

# o 0 a0

95

#

95

#

96

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

96

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

97

# yet the filelog has the copy information in rev 1 and we will not look

97

# yet the filelog has the copy information in rev 1 and we will not look

98

# back far enough unless we also look at the a and b as candidates.

98

# back far enough unless we also look at the a and b as candidates.

99

# This only occurs when a is a descendent of b or visa-versa.

99

# This only occurs when a is a descendent of b or visa-versa.

100

return min(limit, a, b)

100

return min(limit, a, b)

101

102

def _chain(src, dst, a, b):

102

def _chain(src, dst, a, b):

103

'''chain two sets of copies a->b'''

103

'''chain two sets of copies a->b'''

104

t = a.copy()

104

t = a.copy()

105

for k, v in b.iteritems():

105

for k, v in b.iteritems():

106

if v in t:

106

if v in t:

107

# found a chain

107

# found a chain

108

if t[v] != k:

108

if t[v] != k:

109

# file wasn't renamed back to itself

109

# file wasn't renamed back to itself

110

t[k] = t[v]

110

t[k] = t[v]

111

if v not in dst:

111

if v not in dst:

112

# chain was a rename, not a copy

112

# chain was a rename, not a copy

113

del t[v]

113

del t[v]

114

if v in src:

114

if v in src:

115

# file is a copy of an existing file

115

# file is a copy of an existing file

116

t[k] = v

116

t[k] = v

117

118

# remove criss-crossed copies

118

# remove criss-crossed copies

119

for k, v in t.items():

119

for k, v in t.items():

120

if k in src and v in dst:

120

if k in src and v in dst:

121

del t[k]

121

del t[k]

122

123

return t

123

return t

124

125

def _tracefile(fctx, am, limit=-1):

125

def _tracefile(fctx, am, limit=-1):

126

'''return file context that is the ancestor of fctx present in ancestor

126

'''return file context that is the ancestor of fctx present in ancestor

127

manifest am, stopping after the first ancestor lower than limit'''

127

manifest am, stopping after the first ancestor lower than limit'''

128

129

for f in fctx.ancestors():

129

for f in fctx.ancestors():

130

if am.get(f.path(), None) == f.filenode():

130

if am.get(f.path(), None) == f.filenode():

131

return f

131

return f

132

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

132

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

133

return None

133

return None

134

135

def _dirstatecopies(d):

135

def _dirstatecopies(d):

136

ds = d._repo.dirstate

136

ds = d._repo.dirstate

137

c = ds.copies().copy()

137

c = ds.copies().copy()

138

for k in c.keys():

138

for k in c.keys():

139

if ds[k] not in 'anm':

139

if ds[k] not in 'anm':

140

del c[k]

140

del c[k]

141

return c

141

return c

142

143

def _computeforwardmissing(a, b):

143

def _computeforwardmissing(a, b):

144

"""Computes which files are in b but not a.

144

"""Computes which files are in b but not a.

145

This is its own function so extensions can easily wrap this call to see what

145

This is its own function so extensions can easily wrap this call to see what

146

files _forwardcopies is about to process.

146

files _forwardcopies is about to process.

147

"""

147

"""

148

return b.manifest().filesnotin(a.manifest())

148

return b.manifest().filesnotin(a.manifest())

149

150

def _forwardcopies(a, b):

150

def _forwardcopies(a, b):

151

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

151

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

152

153

# check for working copy

153

# check for working copy

154

w = None

154

w = None

155

if b.rev() is None:

155

if b.rev() is None:

156

w = b

156

w = b

157

b = w.p1()

157

b = w.p1()

158

if a == b:

158

if a == b:

159

# short-circuit to avoid issues with merge states

159

# short-circuit to avoid issues with merge states

160

return _dirstatecopies(w)

160

return _dirstatecopies(w)

161

162

# files might have to be traced back to the fctx parent of the last

162

# files might have to be traced back to the fctx parent of the last

163

# one-side-only changeset, but not further back than that

163

# one-side-only changeset, but not further back than that

164

limit = _findlimit(a._repo, a.rev(), b.rev())

164

limit = _findlimit(a._repo, a.rev(), b.rev())

165

if limit is None:

165

if limit is None:

166

limit = -1

166

limit = -1

167

am = a.manifest()

167

am = a.manifest()

168

169

# find where new files came from

169

# find where new files came from

170

# we currently don't try to find where old files went, too expensive

170

# we currently don't try to find where old files went, too expensive

171

# this means we can miss a case like 'hg rm b; hg cp a b'

171

# this means we can miss a case like 'hg rm b; hg cp a b'

172

cm = {}

172

cm = {}

173

missing = _computeforwardmissing(a, b)

173

missing = _computeforwardmissing(a, b)

174

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

174

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

175

for f in missing:

175

for f in missing:

176

fctx = b[f]

176

fctx = b[f]

177

fctx._ancestrycontext = ancestrycontext

177

fctx._ancestrycontext = ancestrycontext

178

ofctx = _tracefile(fctx, am, limit)

178

ofctx = _tracefile(fctx, am, limit)

179

if ofctx:

179

if ofctx:

180

cm[f] = ofctx.path()

180

cm[f] = ofctx.path()

181

182

# combine copies from dirstate if necessary

182

# combine copies from dirstate if necessary

183

if w is not None:

183

if w is not None:

184

cm = _chain(a, w, cm, _dirstatecopies(w))

184

cm = _chain(a, w, cm, _dirstatecopies(w))

185

186

return cm

186

return cm

187

188

def _backwardrenames(a, b):

188

def _backwardrenames(a, b):

189

# Even though we're not taking copies into account, 1:n rename situations

189

# Even though we're not taking copies into account, 1:n rename situations

190

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

190

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

191

# arbitrarily pick one of the renames.

191

# arbitrarily pick one of the renames.

192

f = _forwardcopies(b, a)

192

f = _forwardcopies(b, a)

193

r = {}

193

r = {}

194

for k, v in sorted(f.iteritems()):

194

for k, v in sorted(f.iteritems()):

195

# remove copies

195

# remove copies

196

if v in a:

196

if v in a:

197

continue

197

continue

198

r[v] = k

198

r[v] = k

199

return r

199

return r

200

201

def pathcopies(x, y):

201

def pathcopies(x, y):

202

'''find {dst@y: src@x} copy mapping for directed compare'''

202

'''find {dst@y: src@x} copy mapping for directed compare'''

203

if x == y or not x or not y:

203

if x == y or not x or not y:

204

return {}

204

return {}

205

a = y.ancestor(x)

205

a = y.ancestor(x)

206

if a == x:

206

if a == x:

207

return _forwardcopies(x, y)

207

return _forwardcopies(x, y)

208

if a == y:

208

if a == y:

209

return _backwardrenames(x, y)

209

return _backwardrenames(x, y)

210

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

210

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

211

212

def _computenonoverlap(repo, m1, m2, addedinm1, addedinm2):

212

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

213

"""Computes, based on addedinm1 and addedinm2, the files exclusive to m1

213

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

214

and m2. This is its own function so extensions can easily wrap this call

214

and c2. This is its own function so extensions can easily wrap this call

215

to see what files mergecopies is about to process.

215

to see what files mergecopies is about to process.

216

217

Even though ~~m1 and m~~2 are not used in this function, they are useful in

217

Even though c1 and c2 are not used in this function, they are useful in

218

other extensions for being able to read the file nodes of the changed files.

218

other extensions for being able to read the file nodes of the changed files.

219

"""

219

"""

220

u1 = sorted(addedinm1 - addedinm2)

220

u1 = sorted(addedinm1 - addedinm2)

221

u2 = sorted(addedinm2 - addedinm1)

221

u2 = sorted(addedinm2 - addedinm1)

222

223

if u1:

223

if u1:

224

repo.ui.debug(" unmatched files in local:\n %s\n"

224

repo.ui.debug(" unmatched files in local:\n %s\n"

225

% "\n ".join(u1))

225

% "\n ".join(u1))

226

if u2:

226

if u2:

227

repo.ui.debug(" unmatched files in other:\n %s\n"

227

repo.ui.debug(" unmatched files in other:\n %s\n"

228

% "\n ".join(u2))

228

% "\n ".join(u2))

229

return u1, u2

229

return u1, u2

230

231

def mergecopies(repo, c1, c2, ca):

231

def mergecopies(repo, c1, c2, ca):

232

"""

232

"""

233

Find moves and copies between context c1 and c2 that are relevant

233

Find moves and copies between context c1 and c2 that are relevant

234

for merging.

234

for merging.

235

236

Returns four dicts: "copy", "movewithdir", "diverge", and

236

Returns four dicts: "copy", "movewithdir", "diverge", and

237

"renamedelete".

237

"renamedelete".

238

239

"copy" is a mapping from destination name -> source name,

239

"copy" is a mapping from destination name -> source name,

240

where source is in c1 and destination is in c2 or vice-versa.

240

where source is in c1 and destination is in c2 or vice-versa.

241

242

"movewithdir" is a mapping from source name -> destination name,

242

"movewithdir" is a mapping from source name -> destination name,

243

where the file at source present in one context but not the other

243

where the file at source present in one context but not the other

244

needs to be moved to destination by the merge process, because the

244

needs to be moved to destination by the merge process, because the

245

other context moved the directory it is in.

245

other context moved the directory it is in.

246

247

"diverge" is a mapping of source name -> list of destination names

247

"diverge" is a mapping of source name -> list of destination names

248

for divergent renames.

248

for divergent renames.

249

250

"renamedelete" is a mapping of source name -> list of destination

250

"renamedelete" is a mapping of source name -> list of destination

251

names for files deleted in c1 that were renamed in c2 or vice-versa.

251

names for files deleted in c1 that were renamed in c2 or vice-versa.

252

"""

252

"""

253

# avoid silly behavior for update from empty dir

253

# avoid silly behavior for update from empty dir

254

if not c1 or not c2 or c1 == c2:

254

if not c1 or not c2 or c1 == c2:

255

return {}, {}, {}, {}

255

return {}, {}, {}, {}

256

257

# avoid silly behavior for parent -> working dir

257

# avoid silly behavior for parent -> working dir

258

if c2.node() is None and c1.node() == repo.dirstate.p1():

258

if c2.node() is None and c1.node() == repo.dirstate.p1():

259

return repo.dirstate.copies(), {}, {}, {}

259

return repo.dirstate.copies(), {}, {}, {}

260

261

limit = _findlimit(repo, c1.rev(), c2.rev())

261

limit = _findlimit(repo, c1.rev(), c2.rev())

262

if limit is None:

262

if limit is None:

263

# no common ancestor, no copies

263

# no common ancestor, no copies

264

return {}, {}, {}, {}

264

return {}, {}, {}, {}

265

m1 = c1.manifest()

265

m1 = c1.manifest()

266

m2 = c2.manifest()

266

m2 = c2.manifest()

267

ma = ca.manifest()

267

ma = ca.manifest()

268

269

270

def setupctx(ctx):

270

def setupctx(ctx):

271

"""return a 'makectx' function suitable for checkcopies usage from ctx

271

"""return a 'makectx' function suitable for checkcopies usage from ctx

272

273

We have to re-setup the function building 'filectx' for each

273

We have to re-setup the function building 'filectx' for each

274

'checkcopies' to ensure the linkrev adjustement is properly setup for

274

'checkcopies' to ensure the linkrev adjustement is properly setup for

275

each. Linkrev adjustment is important to avoid bug in rename

275

each. Linkrev adjustment is important to avoid bug in rename

276

detection. Moreover, having a proper '_ancestrycontext' setup ensures

276

detection. Moreover, having a proper '_ancestrycontext' setup ensures

277

the performance impact of this adjustment is kept limited. Without it,

277

the performance impact of this adjustment is kept limited. Without it,

278

each file could do a full dag traversal making the time complexity of

278

each file could do a full dag traversal making the time complexity of

279

the operation explode (see issue4537).

279

the operation explode (see issue4537).

280

281

This function exists here mostly to limit the impact on stable. Feel

281

This function exists here mostly to limit the impact on stable. Feel

282

free to refactor on default.

282

free to refactor on default.

283

"""

283

"""

284

rev = ctx.rev()

284

rev = ctx.rev()

285

ac = getattr(ctx, '_ancestrycontext', None)

285

ac = getattr(ctx, '_ancestrycontext', None)

286

if ac is None:

286

if ac is None:

287

revs = [rev]

287

revs = [rev]

288

if rev is None:

288

if rev is None:

289

revs = [p.rev() for p in ctx.parents()]

289

revs = [p.rev() for p in ctx.parents()]

290

ac = ctx._repo.changelog.ancestors(revs, inclusive=True)

290

ac = ctx._repo.changelog.ancestors(revs, inclusive=True)

291

ctx._ancestrycontext = ac

291

ctx._ancestrycontext = ac

292

def makectx(f, n):

292

def makectx(f, n):

293

if len(n) != 20: # in a working context?

293

if len(n) != 20: # in a working context?

294

if c1.rev() is None:

294

if c1.rev() is None:

295

return c1.filectx(f)

295

return c1.filectx(f)

296

return c2.filectx(f)

296

return c2.filectx(f)

297

fctx = repo.filectx(f, fileid=n)

297

fctx = repo.filectx(f, fileid=n)

298

# setup only needed for filectx not create from a changectx

298

# setup only needed for filectx not create from a changectx

299

fctx._ancestrycontext = ac

299

fctx._ancestrycontext = ac

300

fctx._descendantrev = rev

300

fctx._descendantrev = rev

301

return fctx

301

return fctx

302

return util.lrucachefunc(makectx)

302

return util.lrucachefunc(makectx)

303

304

copy = {}

304

copy = {}

305

movewithdir = {}

305

movewithdir = {}

306

fullcopy = {}

306

fullcopy = {}

307

diverge = {}

307

diverge = {}

308

309

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

309

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

310

311

addedinm1 = m1.filesnotin(ma)

311

addedinm1 = m1.filesnotin(ma)

312

addedinm2 = m2.filesnotin(ma)

312

addedinm2 = m2.filesnotin(ma)

313

u1, u2 = _computenonoverlap(repo, m1, m2, addedinm1, addedinm2)

313

u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

314

315

for f in u1:

315

for f in u1:

316

ctx = setupctx(c1)

316

ctx = setupctx(c1)

317

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

317

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

318

319

for f in u2:

319

for f in u2:

320

ctx = setupctx(c2)

320

ctx = setupctx(c2)

321

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

321

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

322

323

renamedelete = {}

323

renamedelete = {}

324

renamedelete2 = set()

324

renamedelete2 = set()

325

diverge2 = set()

325

diverge2 = set()

326

for of, fl in diverge.items():

326

for of, fl in diverge.items():

327

if len(fl) == 1 or of in c1 or of in c2:

327

if len(fl) == 1 or of in c1 or of in c2:

328

del diverge[of] # not actually divergent, or not a rename

328

del diverge[of] # not actually divergent, or not a rename

329

if of not in c1 and of not in c2:

329

if of not in c1 and of not in c2:

330

# renamed on one side, deleted on the other side, but filter

330

# renamed on one side, deleted on the other side, but filter

331

# out files that have been renamed and then deleted

331

# out files that have been renamed and then deleted

332

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

332

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

333

renamedelete2.update(fl) # reverse map for below

333

renamedelete2.update(fl) # reverse map for below

334

else:

334

else:

335

diverge2.update(fl) # reverse map for below

335

diverge2.update(fl) # reverse map for below

336

337

bothnew = sorted(addedinm1 & addedinm2)

337

bothnew = sorted(addedinm1 & addedinm2)

338

if bothnew:

338

if bothnew:

339

repo.ui.debug(" unmatched files new in both:\n %s\n"

339

repo.ui.debug(" unmatched files new in both:\n %s\n"

340

% "\n ".join(bothnew))

340

% "\n ".join(bothnew))

341

bothdiverge, _copy, _fullcopy = {}, {}, {}

341

bothdiverge, _copy, _fullcopy = {}, {}, {}

342

for f in bothnew:

342

for f in bothnew:

343

ctx = setupctx(c1)

343

ctx = setupctx(c1)

344

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

344

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

345

ctx = setupctx(c2)

345

ctx = setupctx(c2)

346

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

346

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

347

for of, fl in bothdiverge.items():

347

for of, fl in bothdiverge.items():

348

if len(fl) == 2 and fl[0] == fl[1]:

348

if len(fl) == 2 and fl[0] == fl[1]:

349

copy[fl[0]] = of # not actually divergent, just matching renames

349

copy[fl[0]] = of # not actually divergent, just matching renames

350

351

if fullcopy and repo.ui.debugflag:

351

if fullcopy and repo.ui.debugflag:

352

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

352

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

353

"% = renamed and deleted):\n")

353

"% = renamed and deleted):\n")

354

for f in sorted(fullcopy):

354

for f in sorted(fullcopy):

355

note = ""

355

note = ""

356

if f in copy:

356

if f in copy:

357

note += "*"

357

note += "*"

358

if f in diverge2:

358

if f in diverge2:

359

note += "!"

359

note += "!"

360

if f in renamedelete2:

360

if f in renamedelete2:

361

note += "%"

361

note += "%"

362

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

362

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

363

note))

363

note))

364

del diverge2

364

del diverge2

365

366

if not fullcopy:

366

if not fullcopy:

367

return copy, movewithdir, diverge, renamedelete

367

return copy, movewithdir, diverge, renamedelete

368

369

repo.ui.debug(" checking for directory renames\n")

369

repo.ui.debug(" checking for directory renames\n")

370

371

# generate a directory move map

371

# generate a directory move map

372

d1, d2 = c1.dirs(), c2.dirs()

372

d1, d2 = c1.dirs(), c2.dirs()

373

d1.addpath('/')

373

d1.addpath('/')

374

d2.addpath('/')

374

d2.addpath('/')

375

invalid = set()

375

invalid = set()

376

dirmove = {}

376

dirmove = {}

377

378

# examine each file copy for a potential directory move, which is

378

# examine each file copy for a potential directory move, which is

379

# when all the files in a directory are moved to a new directory

379

# when all the files in a directory are moved to a new directory

380

for dst, src in fullcopy.iteritems():

380

for dst, src in fullcopy.iteritems():

381

dsrc, ddst = _dirname(src), _dirname(dst)

381

dsrc, ddst = _dirname(src), _dirname(dst)

382

if dsrc in invalid:

382

if dsrc in invalid:

383

# already seen to be uninteresting

383

# already seen to be uninteresting

384

continue

384

continue

385

elif dsrc in d1 and ddst in d1:

385

elif dsrc in d1 and ddst in d1:

386

# directory wasn't entirely moved locally

386

# directory wasn't entirely moved locally

387

invalid.add(dsrc)

387

invalid.add(dsrc)

388

elif dsrc in d2 and ddst in d2:

388

elif dsrc in d2 and ddst in d2:

389

# directory wasn't entirely moved remotely

389

# directory wasn't entirely moved remotely

390

invalid.add(dsrc)

390

invalid.add(dsrc)

391

elif dsrc in dirmove and dirmove[dsrc] != ddst:

391

elif dsrc in dirmove and dirmove[dsrc] != ddst:

392

# files from the same directory moved to two different places

392

# files from the same directory moved to two different places

393

invalid.add(dsrc)

393

invalid.add(dsrc)

394

else:

394

else:

395

# looks good so far

395

# looks good so far

396

dirmove[dsrc + "/"] = ddst + "/"

396

dirmove[dsrc + "/"] = ddst + "/"

397

398

for i in invalid:

398

for i in invalid:

399

if i in dirmove:

399

if i in dirmove:

400

del dirmove[i]

400

del dirmove[i]

401

del d1, d2, invalid

401

del d1, d2, invalid

402

403

if not dirmove:

403

if not dirmove:

404

return copy, movewithdir, diverge, renamedelete

404

return copy, movewithdir, diverge, renamedelete

405

406

for d in dirmove:

406

for d in dirmove:

407

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

407

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

408

(d, dirmove[d]))

408

(d, dirmove[d]))

409

410

# check unaccounted nonoverlapping files against directory moves

410

# check unaccounted nonoverlapping files against directory moves

411

for f in u1 + u2:

411

for f in u1 + u2:

412

if f not in fullcopy:

412

if f not in fullcopy:

413

for d in dirmove:

413

for d in dirmove:

414

if f.startswith(d):

414

if f.startswith(d):

415

# new file added in a directory that was moved, move it

415

# new file added in a directory that was moved, move it

416

df = dirmove[d] + f[len(d):]

416

df = dirmove[d] + f[len(d):]

417

if df not in copy:

417

if df not in copy:

418

movewithdir[f] = df

418

movewithdir[f] = df

419

repo.ui.debug((" pending file src: '%s' -> "

419

repo.ui.debug((" pending file src: '%s' -> "

420

"dst: '%s'\n") % (f, df))

420

"dst: '%s'\n") % (f, df))

421

break

421

break

422

423

return copy, movewithdir, diverge, renamedelete

423

return copy, movewithdir, diverge, renamedelete

424

425

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

425

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

426

"""

426

"""

427

check possible copies of f from m1 to m2

427

check possible copies of f from m1 to m2

428

429

ctx = function accepting (filename, node) that returns a filectx.

429

ctx = function accepting (filename, node) that returns a filectx.

430

f = the filename to check

430

f = the filename to check

431

m1 = the source manifest

431

m1 = the source manifest

432

m2 = the destination manifest

432

m2 = the destination manifest

433

ca = the changectx of the common ancestor

433

ca = the changectx of the common ancestor

434

limit = the rev number to not search beyond

434

limit = the rev number to not search beyond

435

diverge = record all diverges in this dict

435

diverge = record all diverges in this dict

436

copy = record all non-divergent copies in this dict

436

copy = record all non-divergent copies in this dict

437

fullcopy = record all copies in this dict

437

fullcopy = record all copies in this dict

438

"""

438

"""

439

440

ma = ca.manifest()

440

ma = ca.manifest()

441

442

def _related(f1, f2, limit):

442

def _related(f1, f2, limit):

443

# Walk back to common ancestor to see if the two files originate

443

# Walk back to common ancestor to see if the two files originate

444

# from the same file. Since workingfilectx's rev() is None it messes

444

# from the same file. Since workingfilectx's rev() is None it messes

445

# up the integer comparison logic, hence the pre-step check for

445

# up the integer comparison logic, hence the pre-step check for

446

# None (f1 and f2 can only be workingfilectx's initially).

446

# None (f1 and f2 can only be workingfilectx's initially).

447

448

if f1 == f2:

448

if f1 == f2:

449

return f1 # a match

449

return f1 # a match

450

451

g1, g2 = f1.ancestors(), f2.ancestors()

451

g1, g2 = f1.ancestors(), f2.ancestors()

452

try:

452

try:

453

f1r, f2r = f1.rev(), f2.rev()

453

f1r, f2r = f1.rev(), f2.rev()

454

455

if f1r is None:

455

if f1r is None:

456

f1 = g1.next()

456

f1 = g1.next()

457

if f2r is None:

457

if f2r is None:

458

f2 = g2.next()

458

f2 = g2.next()

459

460

while True:

460

while True:

461

f1r, f2r = f1.rev(), f2.rev()

461

f1r, f2r = f1.rev(), f2.rev()

462

if f1r > f2r:

462

if f1r > f2r:

463

f1 = g1.next()

463

f1 = g1.next()

464

elif f2r > f1r:

464

elif f2r > f1r:

465

f2 = g2.next()

465

f2 = g2.next()

466

elif f1 == f2:

466

elif f1 == f2:

467

return f1 # a match

467

return f1 # a match

468

elif f1r == f2r or f1r < limit or f2r < limit:

468

elif f1r == f2r or f1r < limit or f2r < limit:

469

return False # copy no longer relevant

469

return False # copy no longer relevant

470

except StopIteration:

470

except StopIteration:

471

return False

471

return False

472

473

of = None

473

of = None

474

seen = set([f])

474

seen = set([f])

475

for oc in ctx(f, m1[f]).ancestors():

475

for oc in ctx(f, m1[f]).ancestors():

476

ocr = oc.rev()

476

ocr = oc.rev()

477

of = oc.path()

477

of = oc.path()

478

if of in seen:

478

if of in seen:

479

# check limit late - grab last rename before

479

# check limit late - grab last rename before

480

if ocr < limit:

480

if ocr < limit:

481

break

481

break

482

continue

482

continue

483

seen.add(of)

483

seen.add(of)

484

485

fullcopy[f] = of # remember for dir rename detection

485

fullcopy[f] = of # remember for dir rename detection

486

if of not in m2:

486

if of not in m2:

487

continue # no match, keep looking

487

continue # no match, keep looking

488

if m2[of] == ma.get(of):

488

if m2[of] == ma.get(of):

489

break # no merge needed, quit early

489

break # no merge needed, quit early

490

c2 = ctx(of, m2[of])

490

c2 = ctx(of, m2[of])

491

cr = _related(oc, c2, ca.rev())

491

cr = _related(oc, c2, ca.rev())

492

if cr and (of == f or of == c2.path()): # non-divergent

492

if cr and (of == f or of == c2.path()): # non-divergent

493

copy[f] = of

493

copy[f] = of

494

of = None

494

of = None

495

break

495

break

496

497

if of in ma:

497

if of in ma:

498

diverge.setdefault(of, []).append(f)

498

diverge.setdefault(of, []).append(f)

499

500

def duplicatecopies(repo, rev, fromrev, skiprev=None):

500

def duplicatecopies(repo, rev, fromrev, skiprev=None):

501

'''reproduce copies from fromrev to rev in the dirstate

501

'''reproduce copies from fromrev to rev in the dirstate

502

503

If skiprev is specified, it's a revision that should be used to

503

If skiprev is specified, it's a revision that should be used to

504

filter copy records. Any copies that occur between fromrev and

504

filter copy records. Any copies that occur between fromrev and

505

skiprev will not be duplicated, even if they appear in the set of

505

skiprev will not be duplicated, even if they appear in the set of

506

copies between fromrev and rev.

506

copies between fromrev and rev.

507

'''

507

'''

508

exclude = {}

508

exclude = {}

509

if skiprev is not None:

509

if skiprev is not None:

510

exclude = pathcopies(repo[fromrev], repo[skiprev])

510

exclude = pathcopies(repo[fromrev], repo[skiprev])

511

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

511

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

512

# copies.pathcopies returns backward renames, so dst might not

512

# copies.pathcopies returns backward renames, so dst might not

513

# actually be in the dirstate

513

# actually be in the dirstate

514

if dst in exclude:

514

if dst in exclude:

515

continue

515

continue

516

if repo.dirstate[dst] in "nma":

516

if repo.dirstate[dst] in "nma":

517

repo.dirstate.copy(src, dst)

517

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 return b.manifest().filesnotin(a.manifest())
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 missing = _computeforwardmissing(a, b)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
-            def _computenonoverlap(repo, m1, m2, addedinm1, addedinm2):
+            def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
-                """Computes, based on addedinm1 and addedinm2, the files exclusive to m1
+                """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
-                and m2. This is its own function so extensions can easily wrap this call
+                and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
-                Even though m1 and m2 are not used in this function, they are useful in
+                Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 return u1, u2
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def setupctx(ctx):
                     """return a 'makectx' function suitable for checkcopies usage from ctx
                     We have to re-setup the function building 'filectx' for each
                     'checkcopies' to ensure the linkrev adjustement is properly setup for
                     each. Linkrev adjustment is important to avoid bug in rename
                     detection. Moreover, having a proper '_ancestrycontext' setup ensures
                     the performance impact of this adjustment is kept limited. Without it,
                     each file could do a full dag traversal making the time complexity of
                     the operation explode (see issue4537).
                     This function exists here mostly to limit the impact on stable. Feel
                     free to refactor on default.
                     """
                     rev = ctx.rev()
                     ac = getattr(ctx, '_ancestrycontext', None)
                     if ac is None:
                         revs = [rev]
                         if rev is None:
                             revs = [p.rev() for p in ctx.parents()]
                         ac = ctx._repo.changelog.ancestors(revs, inclusive=True)
                         ctx._ancestrycontext = ac
                     def makectx(f, n):
                         if len(n) != 20:  # in a working context?
                             if c1.rev() is None:
                                 return c1.filectx(f)
                             return c2.filectx(f)
                         fctx = repo.filectx(f, fileid=n)
                         # setup only needed for filectx not create from a changectx
                         fctx._ancestrycontext = ac
                         fctx._descendantrev = rev
                         return fctx
                     return util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 addedinm1 = m1.filesnotin(ma)
                 addedinm2 = m2.filesnotin(ma)
-                u1, u2 = _computenonoverlap(repo, m1, m2, addedinm1, addedinm2)
+                u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                 for f in u1:
                     ctx = setupctx(c1)
                     checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
                 for f in u2:
                     ctx = setupctx(c2)
                     checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
                 bothnew = sorted(addedinm1 & addedinm2)
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge, _copy, _fullcopy = {}, {}, {}
                 for f in bothnew:
                     ctx = setupctx(c1)
                     checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
                     ctx = setupctx(c2)
                     checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
                 """
                 check possible copies of f from m1 to m2
                 ctx = function accepting (filename, node) that returns a filectx.
                 f = the filename to check
                 m1 = the source manifest
                 m2 = the destination manifest
                 ca = the changectx of the common ancestor
                 limit = the rev number to not search beyond
                 diverge = record all diverges in this dict
                 copy = record all non-divergent copies in this dict
                 fullcopy = record all copies in this dict
                 """
                 ma = ca.manifest()
                 def _related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.rev(), f2.rev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.rev(), f2.rev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 of = None
                 seen = set([f])
                 for oc in ctx(f, m1[f]).ancestors():
                     ocr = oc.rev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     fullcopy[f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == ma.get(of):
                         break # no merge needed, quit early
                     c2 = ctx(of, m2[of])
                     cr = _related(oc, c2, ca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         copy[f] = of
                         of = None
                         break
                 if of in ma:
                     diverge.setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if skiprev is not None:
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)