upstream/mercurial-mirror Commit - r25924:cfc24c22

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util, pathutil

8

from __future__ import absolute_import

9

import heapq

10

import heapq

10

11

12

from . import (

13

pathutil,

14

util,

15

)

16

11

def _findlimit(repo, a, b):

17

def _findlimit(repo, a, b):

12

"""

18

"""

13

Find the last revision that needs to be checked to ensure that a full

19

Find the last revision that needs to be checked to ensure that a full

14

transitive closure for file copies can be properly calculated.

20

transitive closure for file copies can be properly calculated.

15

Generally, this means finding the earliest revision number that's an

21

Generally, this means finding the earliest revision number that's an

16

ancestor of a or b but not both, except when a or b is a direct descendent

22

ancestor of a or b but not both, except when a or b is a direct descendent

17

of the other, in which case we can return the minimum revnum of a and b.

23

of the other, in which case we can return the minimum revnum of a and b.

18

None if no such revision exists.

24

None if no such revision exists.

19

"""

25

"""

20

26

21

# basic idea:

27

# basic idea:

22

# - mark a and b with different sides

28

# - mark a and b with different sides

23

# - if a parent's children are all on the same side, the parent is

29

# - if a parent's children are all on the same side, the parent is

24

# on that side, otherwise it is on no side

30

# on that side, otherwise it is on no side

25

# - walk the graph in topological order with the help of a heap;

31

# - walk the graph in topological order with the help of a heap;

26

# - add unseen parents to side map

32

# - add unseen parents to side map

27

# - clear side of any parent that has children on different sides

33

# - clear side of any parent that has children on different sides

28

# - track number of interesting revs that might still be on a side

34

# - track number of interesting revs that might still be on a side

29

# - track the lowest interesting rev seen

35

# - track the lowest interesting rev seen

30

# - quit when interesting revs is zero

36

# - quit when interesting revs is zero

31

37

32

cl = repo.changelog

38

cl = repo.changelog

33

working = len(cl) # pseudo rev for the working directory

39

working = len(cl) # pseudo rev for the working directory

34

if a is None:

40

if a is None:

35

a = working

41

a = working

36

if b is None:

42

if b is None:

37

b = working

43

b = working

38

44

39

side = {a: -1, b: 1}

45

side = {a: -1, b: 1}

40

visit = [-a, -b]

46

visit = [-a, -b]

41

heapq.heapify(visit)

47

heapq.heapify(visit)

42

interesting = len(visit)

48

interesting = len(visit)

43

hascommonancestor = False

49

hascommonancestor = False

44

limit = working

50

limit = working

45

51

46

while interesting:

52

while interesting:

47

r = -heapq.heappop(visit)

53

r = -heapq.heappop(visit)

48

if r == working:

54

if r == working:

49

parents = [cl.rev(p) for p in repo.dirstate.parents()]

55

parents = [cl.rev(p) for p in repo.dirstate.parents()]

50

else:

56

else:

51

parents = cl.parentrevs(r)

57

parents = cl.parentrevs(r)

52

for p in parents:

58

for p in parents:

53

if p < 0:

59

if p < 0:

54

continue

60

continue

55

if p not in side:

61

if p not in side:

56

# first time we see p; add it to visit

62

# first time we see p; add it to visit

57

side[p] = side[r]

63

side[p] = side[r]

58

if side[p]:

64

if side[p]:

59

interesting += 1

65

interesting += 1

60

heapq.heappush(visit, -p)

66

heapq.heappush(visit, -p)

61

elif side[p] and side[p] != side[r]:

67

elif side[p] and side[p] != side[r]:

62

# p was interesting but now we know better

68

# p was interesting but now we know better

63

side[p] = 0

69

side[p] = 0

64

interesting -= 1

70

interesting -= 1

65

hascommonancestor = True

71

hascommonancestor = True

66

if side[r]:

72

if side[r]:

67

limit = r # lowest rev visited

73

limit = r # lowest rev visited

68

interesting -= 1

74

interesting -= 1

69

75

70

if not hascommonancestor:

76

if not hascommonancestor:

71

return None

77

return None

72

78

73

# Consider the following flow (see test-commit-amend.t under issue4405):

79

# Consider the following flow (see test-commit-amend.t under issue4405):

74

# 1/ File 'a0' committed

80

# 1/ File 'a0' committed

75

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

81

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

76

# 3/ Move back to first commit

82

# 3/ Move back to first commit

77

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

83

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

78

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

84

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

79

#

85

#

80

# During the amend in step five, we will be in this state:

86

# During the amend in step five, we will be in this state:

81

#

87

#

82

# @ 3 temporary amend commit for a1-amend

88

# @ 3 temporary amend commit for a1-amend

83

# |

89

# |

84

# o 2 a1-amend

90

# o 2 a1-amend

85

# |

91

# |

86

# | o 1 a1

92

# | o 1 a1

87

# |/

93

# |/

88

# o 0 a0

94

# o 0 a0

89

#

95

#

90

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

96

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

91

# yet the filelog has the copy information in rev 1 and we will not look

97

# yet the filelog has the copy information in rev 1 and we will not look

92

# back far enough unless we also look at the a and b as candidates.

98

# back far enough unless we also look at the a and b as candidates.

93

# This only occurs when a is a descendent of b or visa-versa.

99

# This only occurs when a is a descendent of b or visa-versa.

94

return min(limit, a, b)

100

return min(limit, a, b)

95

101

96

def _chain(src, dst, a, b):

102

def _chain(src, dst, a, b):

97

'''chain two sets of copies a->b'''

103

'''chain two sets of copies a->b'''

98

t = a.copy()

104

t = a.copy()

99

for k, v in b.iteritems():

105

for k, v in b.iteritems():

100

if v in t:

106

if v in t:

101

# found a chain

107

# found a chain

102

if t[v] != k:

108

if t[v] != k:

103

# file wasn't renamed back to itself

109

# file wasn't renamed back to itself

104

t[k] = t[v]

110

t[k] = t[v]

105

if v not in dst:

111

if v not in dst:

106

# chain was a rename, not a copy

112

# chain was a rename, not a copy

107

del t[v]

113

del t[v]

108

if v in src:

114

if v in src:

109

# file is a copy of an existing file

115

# file is a copy of an existing file

110

t[k] = v

116

t[k] = v

111

117

112

# remove criss-crossed copies

118

# remove criss-crossed copies

113

for k, v in t.items():

119

for k, v in t.items():

114

if k in src and v in dst:

120

if k in src and v in dst:

115

del t[k]

121

del t[k]

116

122

117

return t

123

return t

118

124

119

def _tracefile(fctx, am, limit=-1):

125

def _tracefile(fctx, am, limit=-1):

120

'''return file context that is the ancestor of fctx present in ancestor

126

'''return file context that is the ancestor of fctx present in ancestor

121

manifest am, stopping after the first ancestor lower than limit'''

127

manifest am, stopping after the first ancestor lower than limit'''

122

128

123

for f in fctx.ancestors():

129

for f in fctx.ancestors():

124

if am.get(f.path(), None) == f.filenode():

130

if am.get(f.path(), None) == f.filenode():

125

return f

131

return f

126

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

132

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

127

return None

133

return None

128

134

129

def _dirstatecopies(d):

135

def _dirstatecopies(d):

130

ds = d._repo.dirstate

136

ds = d._repo.dirstate

131

c = ds.copies().copy()

137

c = ds.copies().copy()

132

for k in c.keys():

138

for k in c.keys():

133

if ds[k] not in 'anm':

139

if ds[k] not in 'anm':

134

del c[k]

140

del c[k]

135

return c

141

return c

136

142

137

def _computeforwardmissing(a, b, match=None):

143

def _computeforwardmissing(a, b, match=None):

138

"""Computes which files are in b but not a.

144

"""Computes which files are in b but not a.

139

This is its own function so extensions can easily wrap this call to see what

145

This is its own function so extensions can easily wrap this call to see what

140

files _forwardcopies is about to process.

146

files _forwardcopies is about to process.

141

"""

147

"""

142

ma = a.manifest()

148

ma = a.manifest()

143

mb = b.manifest()

149

mb = b.manifest()

144

if match:

150

if match:

145

ma = ma.matches(match)

151

ma = ma.matches(match)

146

mb = mb.matches(match)

152

mb = mb.matches(match)

147

return mb.filesnotin(ma)

153

return mb.filesnotin(ma)

148

154

149

def _forwardcopies(a, b, match=None):

155

def _forwardcopies(a, b, match=None):

150

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

156

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

151

157

152

# check for working copy

158

# check for working copy

153

w = None

159

w = None

154

if b.rev() is None:

160

if b.rev() is None:

155

w = b

161

w = b

156

b = w.p1()

162

b = w.p1()

157

if a == b:

163

if a == b:

158

# short-circuit to avoid issues with merge states

164

# short-circuit to avoid issues with merge states

159

return _dirstatecopies(w)

165

return _dirstatecopies(w)

160

166

161

# files might have to be traced back to the fctx parent of the last

167

# files might have to be traced back to the fctx parent of the last

162

# one-side-only changeset, but not further back than that

168

# one-side-only changeset, but not further back than that

163

limit = _findlimit(a._repo, a.rev(), b.rev())

169

limit = _findlimit(a._repo, a.rev(), b.rev())

164

if limit is None:

170

if limit is None:

165

limit = -1

171

limit = -1

166

am = a.manifest()

172

am = a.manifest()

167

173

168

# find where new files came from

174

# find where new files came from

169

# we currently don't try to find where old files went, too expensive

175

# we currently don't try to find where old files went, too expensive

170

# this means we can miss a case like 'hg rm b; hg cp a b'

176

# this means we can miss a case like 'hg rm b; hg cp a b'

171

cm = {}

177

cm = {}

172

missing = _computeforwardmissing(a, b, match=match)

178

missing = _computeforwardmissing(a, b, match=match)

173

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

179

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

174

for f in missing:

180

for f in missing:

175

fctx = b[f]

181

fctx = b[f]

176

fctx._ancestrycontext = ancestrycontext

182

fctx._ancestrycontext = ancestrycontext

177

ofctx = _tracefile(fctx, am, limit)

183

ofctx = _tracefile(fctx, am, limit)

178

if ofctx:

184

if ofctx:

179

cm[f] = ofctx.path()

185

cm[f] = ofctx.path()

180

186

181

# combine copies from dirstate if necessary

187

# combine copies from dirstate if necessary

182

if w is not None:

188

if w is not None:

183

cm = _chain(a, w, cm, _dirstatecopies(w))

189

cm = _chain(a, w, cm, _dirstatecopies(w))

184

190

185

return cm

191

return cm

186

192

187

def _backwardrenames(a, b):

193

def _backwardrenames(a, b):

188

# Even though we're not taking copies into account, 1:n rename situations

194

# Even though we're not taking copies into account, 1:n rename situations

189

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

195

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

190

# arbitrarily pick one of the renames.

196

# arbitrarily pick one of the renames.

191

f = _forwardcopies(b, a)

197

f = _forwardcopies(b, a)

192

r = {}

198

r = {}

193

for k, v in sorted(f.iteritems()):

199

for k, v in sorted(f.iteritems()):

194

# remove copies

200

# remove copies

195

if v in a:

201

if v in a:

196

continue

202

continue

197

r[v] = k

203

r[v] = k

198

return r

204

return r

199

205

200

def pathcopies(x, y, match=None):

206

def pathcopies(x, y, match=None):

201

'''find {dst@y: src@x} copy mapping for directed compare'''

207

'''find {dst@y: src@x} copy mapping for directed compare'''

202

if x == y or not x or not y:

208

if x == y or not x or not y:

203

return {}

209

return {}

204

a = y.ancestor(x)

210

a = y.ancestor(x)

205

if a == x:

211

if a == x:

206

return _forwardcopies(x, y, match=match)

212

return _forwardcopies(x, y, match=match)

207

if a == y:

213

if a == y:

208

return _backwardrenames(x, y)

214

return _backwardrenames(x, y)

209

return _chain(x, y, _backwardrenames(x, a),

215

return _chain(x, y, _backwardrenames(x, a),

210

_forwardcopies(a, y, match=match))

216

_forwardcopies(a, y, match=match))

211

217

212

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

218

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

213

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

219

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

214

and c2. This is its own function so extensions can easily wrap this call

220

and c2. This is its own function so extensions can easily wrap this call

215

to see what files mergecopies is about to process.

221

to see what files mergecopies is about to process.

216

222

217

Even though c1 and c2 are not used in this function, they are useful in

223

Even though c1 and c2 are not used in this function, they are useful in

218

other extensions for being able to read the file nodes of the changed files.

224

other extensions for being able to read the file nodes of the changed files.

219

"""

225

"""

220

u1 = sorted(addedinm1 - addedinm2)

226

u1 = sorted(addedinm1 - addedinm2)

221

u2 = sorted(addedinm2 - addedinm1)

227

u2 = sorted(addedinm2 - addedinm1)

222

228

223

if u1:

229

if u1:

224

repo.ui.debug(" unmatched files in local:\n %s\n"

230

repo.ui.debug(" unmatched files in local:\n %s\n"

225

% "\n ".join(u1))

231

% "\n ".join(u1))

226

if u2:

232

if u2:

227

repo.ui.debug(" unmatched files in other:\n %s\n"

233

repo.ui.debug(" unmatched files in other:\n %s\n"

228

% "\n ".join(u2))

234

% "\n ".join(u2))

229

return u1, u2

235

return u1, u2

230

236

231

def mergecopies(repo, c1, c2, ca):

237

def mergecopies(repo, c1, c2, ca):

232

"""

238

"""

233

Find moves and copies between context c1 and c2 that are relevant

239

Find moves and copies between context c1 and c2 that are relevant

234

for merging.

240

for merging.

235

241

236

Returns four dicts: "copy", "movewithdir", "diverge", and

242

Returns four dicts: "copy", "movewithdir", "diverge", and

237

"renamedelete".

243

"renamedelete".

238

244

239

"copy" is a mapping from destination name -> source name,

245

"copy" is a mapping from destination name -> source name,

240

where source is in c1 and destination is in c2 or vice-versa.

246

where source is in c1 and destination is in c2 or vice-versa.

241

247

242

"movewithdir" is a mapping from source name -> destination name,

248

"movewithdir" is a mapping from source name -> destination name,

243

where the file at source present in one context but not the other

249

where the file at source present in one context but not the other

244

needs to be moved to destination by the merge process, because the

250

needs to be moved to destination by the merge process, because the

245

other context moved the directory it is in.

251

other context moved the directory it is in.

246

252

247

"diverge" is a mapping of source name -> list of destination names

253

"diverge" is a mapping of source name -> list of destination names

248

for divergent renames.

254

for divergent renames.

249

255

250

"renamedelete" is a mapping of source name -> list of destination

256

"renamedelete" is a mapping of source name -> list of destination

251

names for files deleted in c1 that were renamed in c2 or vice-versa.

257

names for files deleted in c1 that were renamed in c2 or vice-versa.

252

"""

258

"""

253

# avoid silly behavior for update from empty dir

259

# avoid silly behavior for update from empty dir

254

if not c1 or not c2 or c1 == c2:

260

if not c1 or not c2 or c1 == c2:

255

return {}, {}, {}, {}

261

return {}, {}, {}, {}

256

262

257

# avoid silly behavior for parent -> working dir

263

# avoid silly behavior for parent -> working dir

258

if c2.node() is None and c1.node() == repo.dirstate.p1():

264

if c2.node() is None and c1.node() == repo.dirstate.p1():

259

return repo.dirstate.copies(), {}, {}, {}

265

return repo.dirstate.copies(), {}, {}, {}

260

266

261

limit = _findlimit(repo, c1.rev(), c2.rev())

267

limit = _findlimit(repo, c1.rev(), c2.rev())

262

if limit is None:

268

if limit is None:

263

# no common ancestor, no copies

269

# no common ancestor, no copies

264

return {}, {}, {}, {}

270

return {}, {}, {}, {}

265

m1 = c1.manifest()

271

m1 = c1.manifest()

266

m2 = c2.manifest()

272

m2 = c2.manifest()

267

ma = ca.manifest()

273

ma = ca.manifest()

268

274

269

275

270

def setupctx(ctx):

276

def setupctx(ctx):

271

"""return a 'makectx' function suitable for checkcopies usage from ctx

277

"""return a 'makectx' function suitable for checkcopies usage from ctx

272

278

273

We have to re-setup the function building 'filectx' for each

279

We have to re-setup the function building 'filectx' for each

274

'checkcopies' to ensure the linkrev adjustement is properly setup for

280

'checkcopies' to ensure the linkrev adjustement is properly setup for

275

each. Linkrev adjustment is important to avoid bug in rename

281

each. Linkrev adjustment is important to avoid bug in rename

276

detection. Moreover, having a proper '_ancestrycontext' setup ensures

282

detection. Moreover, having a proper '_ancestrycontext' setup ensures

277

the performance impact of this adjustment is kept limited. Without it,

283

the performance impact of this adjustment is kept limited. Without it,

278

each file could do a full dag traversal making the time complexity of

284

each file could do a full dag traversal making the time complexity of

279

the operation explode (see issue4537).

285

the operation explode (see issue4537).

280

286

281

This function exists here mostly to limit the impact on stable. Feel

287

This function exists here mostly to limit the impact on stable. Feel

282

free to refactor on default.

288

free to refactor on default.

283

"""

289

"""

284

rev = ctx.rev()

290

rev = ctx.rev()

285

ac = getattr(ctx, '_ancestrycontext', None)

291

ac = getattr(ctx, '_ancestrycontext', None)

286

if ac is None:

292

if ac is None:

287

revs = [rev]

293

revs = [rev]

288

if rev is None:

294

if rev is None:

289

revs = [p.rev() for p in ctx.parents()]

295

revs = [p.rev() for p in ctx.parents()]

290

ac = ctx._repo.changelog.ancestors(revs, inclusive=True)

296

ac = ctx._repo.changelog.ancestors(revs, inclusive=True)

291

ctx._ancestrycontext = ac

297

ctx._ancestrycontext = ac

292

def makectx(f, n):

298

def makectx(f, n):

293

if len(n) != 20: # in a working context?

299

if len(n) != 20: # in a working context?

294

if c1.rev() is None:

300

if c1.rev() is None:

295

return c1.filectx(f)

301

return c1.filectx(f)

296

return c2.filectx(f)

302

return c2.filectx(f)

297

fctx = repo.filectx(f, fileid=n)

303

fctx = repo.filectx(f, fileid=n)

298

# setup only needed for filectx not create from a changectx

304

# setup only needed for filectx not create from a changectx

299

fctx._ancestrycontext = ac

305

fctx._ancestrycontext = ac

300

fctx._descendantrev = rev

306

fctx._descendantrev = rev

301

return fctx

307

return fctx

302

return util.lrucachefunc(makectx)

308

return util.lrucachefunc(makectx)

303

309

304

copy = {}

310

copy = {}

305

movewithdir = {}

311

movewithdir = {}

306

fullcopy = {}

312

fullcopy = {}

307

diverge = {}

313

diverge = {}

308

314

309

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

315

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

310

316

311

addedinm1 = m1.filesnotin(ma)

317

addedinm1 = m1.filesnotin(ma)

312

addedinm2 = m2.filesnotin(ma)

318

addedinm2 = m2.filesnotin(ma)

313

u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

319

u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

314

320

315

for f in u1:

321

for f in u1:

316

ctx = setupctx(c1)

322

ctx = setupctx(c1)

317

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

323

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

318

324

319

for f in u2:

325

for f in u2:

320

ctx = setupctx(c2)

326

ctx = setupctx(c2)

321

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

327

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

322

328

323

renamedelete = {}

329

renamedelete = {}

324

renamedelete2 = set()

330

renamedelete2 = set()

325

diverge2 = set()

331

diverge2 = set()

326

for of, fl in diverge.items():

332

for of, fl in diverge.items():

327

if len(fl) == 1 or of in c1 or of in c2:

333

if len(fl) == 1 or of in c1 or of in c2:

328

del diverge[of] # not actually divergent, or not a rename

334

del diverge[of] # not actually divergent, or not a rename

329

if of not in c1 and of not in c2:

335

if of not in c1 and of not in c2:

330

# renamed on one side, deleted on the other side, but filter

336

# renamed on one side, deleted on the other side, but filter

331

# out files that have been renamed and then deleted

337

# out files that have been renamed and then deleted

332

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

338

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

333

renamedelete2.update(fl) # reverse map for below

339

renamedelete2.update(fl) # reverse map for below

334

else:

340

else:

335

diverge2.update(fl) # reverse map for below

341

diverge2.update(fl) # reverse map for below

336

342

337

bothnew = sorted(addedinm1 & addedinm2)

343

bothnew = sorted(addedinm1 & addedinm2)

338

if bothnew:

344

if bothnew:

339

repo.ui.debug(" unmatched files new in both:\n %s\n"

345

repo.ui.debug(" unmatched files new in both:\n %s\n"

340

% "\n ".join(bothnew))

346

% "\n ".join(bothnew))

341

bothdiverge, _copy, _fullcopy = {}, {}, {}

347

bothdiverge, _copy, _fullcopy = {}, {}, {}

342

for f in bothnew:

348

for f in bothnew:

343

ctx = setupctx(c1)

349

ctx = setupctx(c1)

344

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

350

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

345

ctx = setupctx(c2)

351

ctx = setupctx(c2)

346

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

352

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

347

for of, fl in bothdiverge.items():

353

for of, fl in bothdiverge.items():

348

if len(fl) == 2 and fl[0] == fl[1]:

354

if len(fl) == 2 and fl[0] == fl[1]:

349

copy[fl[0]] = of # not actually divergent, just matching renames

355

copy[fl[0]] = of # not actually divergent, just matching renames

350

356

351

if fullcopy and repo.ui.debugflag:

357

if fullcopy and repo.ui.debugflag:

352

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

358

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

353

"% = renamed and deleted):\n")

359

"% = renamed and deleted):\n")

354

for f in sorted(fullcopy):

360

for f in sorted(fullcopy):

355

note = ""

361

note = ""

356

if f in copy:

362

if f in copy:

357

note += "*"

363

note += "*"

358

if f in diverge2:

364

if f in diverge2:

359

note += "!"

365

note += "!"

360

if f in renamedelete2:

366

if f in renamedelete2:

361

note += "%"

367

note += "%"

362

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

368

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

363

note))

369

note))

364

del diverge2

370

del diverge2

365

371

366

if not fullcopy:

372

if not fullcopy:

367

return copy, movewithdir, diverge, renamedelete

373

return copy, movewithdir, diverge, renamedelete

368

374

369

repo.ui.debug(" checking for directory renames\n")

375

repo.ui.debug(" checking for directory renames\n")

370

376

371

# generate a directory move map

377

# generate a directory move map

372

d1, d2 = c1.dirs(), c2.dirs()

378

d1, d2 = c1.dirs(), c2.dirs()

373

# Hack for adding '', which is not otherwise added, to d1 and d2

379

# Hack for adding '', which is not otherwise added, to d1 and d2

374

d1.addpath('/')

380

d1.addpath('/')

375

d2.addpath('/')

381

d2.addpath('/')

376

invalid = set()

382

invalid = set()

377

dirmove = {}

383

dirmove = {}

378

384

379

# examine each file copy for a potential directory move, which is

385

# examine each file copy for a potential directory move, which is

380

# when all the files in a directory are moved to a new directory

386

# when all the files in a directory are moved to a new directory

381

for dst, src in fullcopy.iteritems():

387

for dst, src in fullcopy.iteritems():

382

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

388

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

383

if dsrc in invalid:

389

if dsrc in invalid:

384

# already seen to be uninteresting

390

# already seen to be uninteresting

385

continue

391

continue

386

elif dsrc in d1 and ddst in d1:

392

elif dsrc in d1 and ddst in d1:

387

# directory wasn't entirely moved locally

393

# directory wasn't entirely moved locally

388

invalid.add(dsrc)

394

invalid.add(dsrc)

389

elif dsrc in d2 and ddst in d2:

395

elif dsrc in d2 and ddst in d2:

390

# directory wasn't entirely moved remotely

396

# directory wasn't entirely moved remotely

391

invalid.add(dsrc)

397

invalid.add(dsrc)

392

elif dsrc in dirmove and dirmove[dsrc] != ddst:

398

elif dsrc in dirmove and dirmove[dsrc] != ddst:

393

# files from the same directory moved to two different places

399

# files from the same directory moved to two different places

394

invalid.add(dsrc)

400

invalid.add(dsrc)

395

else:

401

else:

396

# looks good so far

402

# looks good so far

397

dirmove[dsrc + "/"] = ddst + "/"

403

dirmove[dsrc + "/"] = ddst + "/"

398

404

399

for i in invalid:

405

for i in invalid:

400

if i in dirmove:

406

if i in dirmove:

401

del dirmove[i]

407

del dirmove[i]

402

del d1, d2, invalid

408

del d1, d2, invalid

403

409

404

if not dirmove:

410

if not dirmove:

405

return copy, movewithdir, diverge, renamedelete

411

return copy, movewithdir, diverge, renamedelete

406

412

407

for d in dirmove:

413

for d in dirmove:

408

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

414

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

409

(d, dirmove[d]))

415

(d, dirmove[d]))

410

416

411

# check unaccounted nonoverlapping files against directory moves

417

# check unaccounted nonoverlapping files against directory moves

412

for f in u1 + u2:

418

for f in u1 + u2:

413

if f not in fullcopy:

419

if f not in fullcopy:

414

for d in dirmove:

420

for d in dirmove:

415

if f.startswith(d):

421

if f.startswith(d):

416

# new file added in a directory that was moved, move it

422

# new file added in a directory that was moved, move it

417

df = dirmove[d] + f[len(d):]

423

df = dirmove[d] + f[len(d):]

418

if df not in copy:

424

if df not in copy:

419

movewithdir[f] = df

425

movewithdir[f] = df

420

repo.ui.debug((" pending file src: '%s' -> "

426

repo.ui.debug((" pending file src: '%s' -> "

421

"dst: '%s'\n") % (f, df))

427

"dst: '%s'\n") % (f, df))

422

break

428

break

423

429

424

return copy, movewithdir, diverge, renamedelete

430

return copy, movewithdir, diverge, renamedelete

425

431

426

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

432

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

427

"""

433

"""

428

check possible copies of f from m1 to m2

434

check possible copies of f from m1 to m2

429

435

430

ctx = function accepting (filename, node) that returns a filectx.

436

ctx = function accepting (filename, node) that returns a filectx.

431

f = the filename to check

437

f = the filename to check

432

m1 = the source manifest

438

m1 = the source manifest

433

m2 = the destination manifest

439

m2 = the destination manifest

434

ca = the changectx of the common ancestor

440

ca = the changectx of the common ancestor

435

limit = the rev number to not search beyond

441

limit = the rev number to not search beyond

436

diverge = record all diverges in this dict

442

diverge = record all diverges in this dict

437

copy = record all non-divergent copies in this dict

443

copy = record all non-divergent copies in this dict

438

fullcopy = record all copies in this dict

444

fullcopy = record all copies in this dict

439

"""

445

"""

440

446

441

ma = ca.manifest()

447

ma = ca.manifest()

442

448

443

def _related(f1, f2, limit):

449

def _related(f1, f2, limit):

444

# Walk back to common ancestor to see if the two files originate

450

# Walk back to common ancestor to see if the two files originate

445

# from the same file. Since workingfilectx's rev() is None it messes

451

# from the same file. Since workingfilectx's rev() is None it messes

446

# up the integer comparison logic, hence the pre-step check for

452

# up the integer comparison logic, hence the pre-step check for

447

# None (f1 and f2 can only be workingfilectx's initially).

453

# None (f1 and f2 can only be workingfilectx's initially).

448

454

449

if f1 == f2:

455

if f1 == f2:

450

return f1 # a match

456

return f1 # a match

451

457

452

g1, g2 = f1.ancestors(), f2.ancestors()

458

g1, g2 = f1.ancestors(), f2.ancestors()

453

try:

459

try:

454

f1r, f2r = f1.linkrev(), f2.linkrev()

460

f1r, f2r = f1.linkrev(), f2.linkrev()

455

461

456

if f1r is None:

462

if f1r is None:

457

f1 = g1.next()

463

f1 = g1.next()

458

if f2r is None:

464

if f2r is None:

459

f2 = g2.next()

465

f2 = g2.next()

460

466

461

while True:

467

while True:

462

f1r, f2r = f1.linkrev(), f2.linkrev()

468

f1r, f2r = f1.linkrev(), f2.linkrev()

463

if f1r > f2r:

469

if f1r > f2r:

464

f1 = g1.next()

470

f1 = g1.next()

465

elif f2r > f1r:

471

elif f2r > f1r:

466

f2 = g2.next()

472

f2 = g2.next()

467

elif f1 == f2:

473

elif f1 == f2:

468

return f1 # a match

474

return f1 # a match

469

elif f1r == f2r or f1r < limit or f2r < limit:

475

elif f1r == f2r or f1r < limit or f2r < limit:

470

return False # copy no longer relevant

476

return False # copy no longer relevant

471

except StopIteration:

477

except StopIteration:

472

return False

478

return False

473

479

474

of = None

480

of = None

475

seen = set([f])

481

seen = set([f])

476

for oc in ctx(f, m1[f]).ancestors():

482

for oc in ctx(f, m1[f]).ancestors():

477

ocr = oc.linkrev()

483

ocr = oc.linkrev()

478

of = oc.path()

484

of = oc.path()

479

if of in seen:

485

if of in seen:

480

# check limit late - grab last rename before

486

# check limit late - grab last rename before

481

if ocr < limit:

487

if ocr < limit:

482

break

488

break

483

continue

489

continue

484

seen.add(of)

490

seen.add(of)

485

491

486

fullcopy[f] = of # remember for dir rename detection

492

fullcopy[f] = of # remember for dir rename detection

487

if of not in m2:

493

if of not in m2:

488

continue # no match, keep looking

494

continue # no match, keep looking

489

if m2[of] == ma.get(of):

495

if m2[of] == ma.get(of):

490

break # no merge needed, quit early

496

break # no merge needed, quit early

491

c2 = ctx(of, m2[of])

497

c2 = ctx(of, m2[of])

492

cr = _related(oc, c2, ca.rev())

498

cr = _related(oc, c2, ca.rev())

493

if cr and (of == f or of == c2.path()): # non-divergent

499

if cr and (of == f or of == c2.path()): # non-divergent

494

copy[f] = of

500

copy[f] = of

495

of = None

501

of = None

496

break

502

break

497

503

498

if of in ma:

504

if of in ma:

499

diverge.setdefault(of, []).append(f)

505

diverge.setdefault(of, []).append(f)

500

506

501

def duplicatecopies(repo, rev, fromrev, skiprev=None):

507

def duplicatecopies(repo, rev, fromrev, skiprev=None):

502

'''reproduce copies from fromrev to rev in the dirstate

508

'''reproduce copies from fromrev to rev in the dirstate

503

509

504

If skiprev is specified, it's a revision that should be used to

510

If skiprev is specified, it's a revision that should be used to

505

filter copy records. Any copies that occur between fromrev and

511

filter copy records. Any copies that occur between fromrev and

506

skiprev will not be duplicated, even if they appear in the set of

512

skiprev will not be duplicated, even if they appear in the set of

507

copies between fromrev and rev.

513

copies between fromrev and rev.

508

'''

514

'''

509

exclude = {}

515

exclude = {}

510

if skiprev is not None:

516

if skiprev is not None:

511

exclude = pathcopies(repo[fromrev], repo[skiprev])

517

exclude = pathcopies(repo[fromrev], repo[skiprev])

512

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

518

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

513

# copies.pathcopies returns backward renames, so dst might not

519

# copies.pathcopies returns backward renames, so dst might not

514

# actually be in the dirstate

520

# actually be in the dirstate

515

if dst in exclude:

521

if dst in exclude:

516

continue

522

continue

517

if repo.dirstate[dst] in "nma":

523

if repo.dirstate[dst] in "nma":

518

repo.dirstate.copy(src, dst)

524

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
-            import util, pathutil
+            from __future__ import absolute_import
             import heapq
+            from . import (
+                pathutil,
+                util,
+            )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 if match:
                     ma = ma.matches(match)
                     mb = mb.matches(match)
                 return mb.filesnotin(ma)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 missing = _computeforwardmissing(a, b, match=match)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 return u1, u2
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def setupctx(ctx):
                     """return a 'makectx' function suitable for checkcopies usage from ctx
                     We have to re-setup the function building 'filectx' for each
                     'checkcopies' to ensure the linkrev adjustement is properly setup for
                     each. Linkrev adjustment is important to avoid bug in rename
                     detection. Moreover, having a proper '_ancestrycontext' setup ensures
                     the performance impact of this adjustment is kept limited. Without it,
                     each file could do a full dag traversal making the time complexity of
                     the operation explode (see issue4537).
                     This function exists here mostly to limit the impact on stable. Feel
                     free to refactor on default.
                     """
                     rev = ctx.rev()
                     ac = getattr(ctx, '_ancestrycontext', None)
                     if ac is None:
                         revs = [rev]
                         if rev is None:
                             revs = [p.rev() for p in ctx.parents()]
                         ac = ctx._repo.changelog.ancestors(revs, inclusive=True)
                         ctx._ancestrycontext = ac
                     def makectx(f, n):
                         if len(n) != 20:  # in a working context?
                             if c1.rev() is None:
                                 return c1.filectx(f)
                             return c2.filectx(f)
                         fctx = repo.filectx(f, fileid=n)
                         # setup only needed for filectx not create from a changectx
                         fctx._ancestrycontext = ac
                         fctx._descendantrev = rev
                         return fctx
                     return util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 addedinm1 = m1.filesnotin(ma)
                 addedinm2 = m2.filesnotin(ma)
                 u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                 for f in u1:
                     ctx = setupctx(c1)
                     checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
                 for f in u2:
                     ctx = setupctx(c2)
                     checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
                 bothnew = sorted(addedinm1 & addedinm2)
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge, _copy, _fullcopy = {}, {}, {}
                 for f in bothnew:
                     ctx = setupctx(c1)
                     checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
                     ctx = setupctx(c2)
                     checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
                 """
                 check possible copies of f from m1 to m2
                 ctx = function accepting (filename, node) that returns a filectx.
                 f = the filename to check
                 m1 = the source manifest
                 m2 = the destination manifest
                 ca = the changectx of the common ancestor
                 limit = the rev number to not search beyond
                 diverge = record all diverges in this dict
                 copy = record all non-divergent copies in this dict
                 fullcopy = record all copies in this dict
                 """
                 ma = ca.manifest()
                 def _related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.linkrev(), f2.linkrev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 of = None
                 seen = set([f])
                 for oc in ctx(f, m1[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     fullcopy[f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == ma.get(of):
                         break # no merge needed, quit early
                     c2 = ctx(of, m2[of])
                     cr = _related(oc, c2, ca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         copy[f] = of
                         of = None
                         break
                 if of in ma:
                     diverge.setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if skiprev is not None:
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)