upstream/mercurial-mirror Commit - r30184:7321c6b0

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import heapq

10

import heapq

11

12

from . import (

12

from . import (

13

node,

13

node,

14

pathutil,

14

pathutil,

15

scmutil,

15

scmutil,

16

util,

16

util,

17

)

17

)

18

19

def _findlimit(repo, a, b):

19

def _findlimit(repo, a, b):

20

"""

20

"""

21

Find the last revision that needs to be checked to ensure that a full

21

Find the last revision that needs to be checked to ensure that a full

22

transitive closure for file copies can be properly calculated.

22

transitive closure for file copies can be properly calculated.

23

Generally, this means finding the earliest revision number that's an

23

Generally, this means finding the earliest revision number that's an

24

ancestor of a or b but not both, except when a or b is a direct descendent

24

ancestor of a or b but not both, except when a or b is a direct descendent

25

of the other, in which case we can return the minimum revnum of a and b.

25

of the other, in which case we can return the minimum revnum of a and b.

26

None if no such revision exists.

26

None if no such revision exists.

27

"""

27

"""

28

29

# basic idea:

29

# basic idea:

30

# - mark a and b with different sides

30

# - mark a and b with different sides

31

# - if a parent's children are all on the same side, the parent is

31

# - if a parent's children are all on the same side, the parent is

32

# on that side, otherwise it is on no side

32

# on that side, otherwise it is on no side

33

# - walk the graph in topological order with the help of a heap;

33

# - walk the graph in topological order with the help of a heap;

34

# - add unseen parents to side map

34

# - add unseen parents to side map

35

# - clear side of any parent that has children on different sides

35

# - clear side of any parent that has children on different sides

36

# - track number of interesting revs that might still be on a side

36

# - track number of interesting revs that might still be on a side

37

# - track the lowest interesting rev seen

37

# - track the lowest interesting rev seen

38

# - quit when interesting revs is zero

38

# - quit when interesting revs is zero

39

40

cl = repo.changelog

40

cl = repo.changelog

41

working = len(cl) # pseudo rev for the working directory

41

working = len(cl) # pseudo rev for the working directory

42

if a is None:

42

if a is None:

43

a = working

43

a = working

44

if b is None:

44

if b is None:

45

b = working

45

b = working

46

47

side = {a: -1, b: 1}

47

side = {a: -1, b: 1}

48

visit = [-a, -b]

48

visit = [-a, -b]

49

heapq.heapify(visit)

49

heapq.heapify(visit)

50

interesting = len(visit)

50

interesting = len(visit)

51

hascommonancestor = False

51

hascommonancestor = False

52

limit = working

52

limit = working

53

54

while interesting:

54

while interesting:

55

r = -heapq.heappop(visit)

55

r = -heapq.heappop(visit)

56

if r == working:

56

if r == working:

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

58

else:

58

else:

59

parents = cl.parentrevs(r)

59

parents = cl.parentrevs(r)

60

for p in parents:

60

for p in parents:

61

if p < 0:

61

if p < 0:

62

continue

62

continue

63

if p not in side:

63

if p not in side:

64

# first time we see p; add it to visit

64

# first time we see p; add it to visit

65

side[p] = side[r]

65

side[p] = side[r]

66

if side[p]:

66

if side[p]:

67

interesting += 1

67

interesting += 1

68

heapq.heappush(visit, -p)

68

heapq.heappush(visit, -p)

69

elif side[p] and side[p] != side[r]:

69

elif side[p] and side[p] != side[r]:

70

# p was interesting but now we know better

70

# p was interesting but now we know better

71

side[p] = 0

71

side[p] = 0

72

interesting -= 1

72

interesting -= 1

73

hascommonancestor = True

73

hascommonancestor = True

74

if side[r]:

74

if side[r]:

75

limit = r # lowest rev visited

75

limit = r # lowest rev visited

76

interesting -= 1

76

interesting -= 1

77

78

if not hascommonancestor:

78

if not hascommonancestor:

79

return None

79

return None

80

81

# Consider the following flow (see test-commit-amend.t under issue4405):

81

# Consider the following flow (see test-commit-amend.t under issue4405):

82

# 1/ File 'a0' committed

82

# 1/ File 'a0' committed

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

84

# 3/ Move back to first commit

84

# 3/ Move back to first commit

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

87

#

87

#

88

# During the amend in step five, we will be in this state:

88

# During the amend in step five, we will be in this state:

89

#

89

#

90

# @ 3 temporary amend commit for a1-amend

90

# @ 3 temporary amend commit for a1-amend

91

# |

91

# |

92

# o 2 a1-amend

92

# o 2 a1-amend

93

# |

93

# |

94

# | o 1 a1

94

# | o 1 a1

95

# |/

95

# |/

96

# o 0 a0

96

# o 0 a0

97

#

97

#

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

99

# yet the filelog has the copy information in rev 1 and we will not look

99

# yet the filelog has the copy information in rev 1 and we will not look

100

# back far enough unless we also look at the a and b as candidates.

100

# back far enough unless we also look at the a and b as candidates.

101

# This only occurs when a is a descendent of b or visa-versa.

101

# This only occurs when a is a descendent of b or visa-versa.

102

return min(limit, a, b)

102

return min(limit, a, b)

103

104

def _chain(src, dst, a, b):

104

def _chain(src, dst, a, b):

105

'''chain two sets of copies a->b'''

105

'''chain two sets of copies a->b'''

106

t = a.copy()

106

t = a.copy()

107

for k, v in b.iteritems():

107

for k, v in b.iteritems():

108

if v in t:

108

if v in t:

109

# found a chain

109

# found a chain

110

if t[v] != k:

110

if t[v] != k:

111

# file wasn't renamed back to itself

111

# file wasn't renamed back to itself

112

t[k] = t[v]

112

t[k] = t[v]

113

if v not in dst:

113

if v not in dst:

114

# chain was a rename, not a copy

114

# chain was a rename, not a copy

115

del t[v]

115

del t[v]

116

if v in src:

116

if v in src:

117

# file is a copy of an existing file

117

# file is a copy of an existing file

118

t[k] = v

118

t[k] = v

119

120

# remove criss-crossed copies

120

# remove criss-crossed copies

121

for k, v in t.items():

121

for k, v in t.items():

122

if k in src and v in dst:

122

if k in src and v in dst:

123

del t[k]

123

del t[k]

124

125

return t

125

return t

126

127

def _tracefile(fctx, am, limit=-1):

127

def _tracefile(fctx, am, limit=-1):

128

'''return file context that is the ancestor of fctx present in ancestor

128

'''return file context that is the ancestor of fctx present in ancestor

129

manifest am, stopping after the first ancestor lower than limit'''

129

manifest am, stopping after the first ancestor lower than limit'''

130

131

for f in fctx.ancestors():

131

for f in fctx.ancestors():

132

if am.get(f.path(), None) == f.filenode():

132

if am.get(f.path(), None) == f.filenode():

133

return f

133

return f

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

135

return None

135

return None

136

137

def _dirstatecopies(d):

137

def _dirstatecopies(d):

138

ds = d._repo.dirstate

138

ds = d._repo.dirstate

139

c = ds.copies().copy()

139

c = ds.copies().copy()

140

for k in c.keys():

140

for k in c.keys():

141

if ds[k] not in 'anm':

141

if ds[k] not in 'anm':

142

del c[k]

142

del c[k]

143

return c

143

return c

144

145

def _computeforwardmissing(a, b, match=None):

145

def _computeforwardmissing(a, b, match=None):

146

"""Computes which files are in b but not a.

146

"""Computes which files are in b but not a.

147

This is its own function so extensions can easily wrap this call to see what

147

This is its own function so extensions can easily wrap this call to see what

148

files _forwardcopies is about to process.

148

files _forwardcopies is about to process.

149

"""

149

"""

150

ma = a.manifest()

150

ma = a.manifest()

151

mb = b.manifest()

151

mb = b.manifest()

152

if match:

152

if match:

153

ma = ma.matches(match)

153

ma = ma.matches(match)

154

mb = mb.matches(match)

154

mb = mb.matches(match)

155

return mb.filesnotin(ma)

155

return mb.filesnotin(ma)

156

157

def _forwardcopies(a, b, match=None):

157

def _forwardcopies(a, b, match=None):

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

159

160

# check for working copy

160

# check for working copy

161

w = None

161

w = None

162

if b.rev() is None:

162

if b.rev() is None:

163

w = b

163

w = b

164

b = w.p1()

164

b = w.p1()

165

if a == b:

165

if a == b:

166

# short-circuit to avoid issues with merge states

166

# short-circuit to avoid issues with merge states

167

return _dirstatecopies(w)

167

return _dirstatecopies(w)

168

169

# files might have to be traced back to the fctx parent of the last

169

# files might have to be traced back to the fctx parent of the last

170

# one-side-only changeset, but not further back than that

170

# one-side-only changeset, but not further back than that

171

limit = _findlimit(a._repo, a.rev(), b.rev())

171

limit = _findlimit(a._repo, a.rev(), b.rev())

172

if limit is None:

172

if limit is None:

173

limit = -1

173

limit = -1

174

am = a.manifest()

174

am = a.manifest()

175

176

# find where new files came from

176

# find where new files came from

177

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

178

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

179

cm = {}

179

cm = {}

180

181

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

182

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

183

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

184

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

185

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

186

# this comparison.

186

# this comparison.

187

forwardmissingmatch = match

187

forwardmissingmatch = match

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

for f in missing:

193

for f in missing:

194

fctx = b[f]

194

fctx = b[f]

195

fctx._ancestrycontext = ancestrycontext

195

fctx._ancestrycontext = ancestrycontext

196

ofctx = _tracefile(fctx, am, limit)

196

ofctx = _tracefile(fctx, am, limit)

197

if ofctx:

197

if ofctx:

198

cm[f] = ofctx.path()

198

cm[f] = ofctx.path()

199

200

# combine copies from dirstate if necessary

200

# combine copies from dirstate if necessary

201

if w is not None:

201

if w is not None:

202

cm = _chain(a, w, cm, _dirstatecopies(w))

202

cm = _chain(a, w, cm, _dirstatecopies(w))

203

204

return cm

204

return cm

205

206

def _backwardrenames(a, b):

206

def _backwardrenames(a, b):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

208

return {}

208

return {}

209

210

# Even though we're not taking copies into account, 1:n rename situations

210

# Even though we're not taking copies into account, 1:n rename situations

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

212

# arbitrarily pick one of the renames.

212

# arbitrarily pick one of the renames.

213

f = _forwardcopies(b, a)

213

f = _forwardcopies(b, a)

214

r = {}

214

r = {}

215

for k, v in sorted(f.iteritems()):

215

for k, v in sorted(f.iteritems()):

216

# remove copies

216

# remove copies

217

if v in a:

217

if v in a:

218

continue

218

continue

219

r[v] = k

219

r[v] = k

220

return r

220

return r

221

222

def pathcopies(x, y, match=None):

222

def pathcopies(x, y, match=None):

223

'''find {dst@y: src@x} copy mapping for directed compare'''

223

'''find {dst@y: src@x} copy mapping for directed compare'''

224

if x == y or not x or not y:

224

if x == y or not x or not y:

225

return {}

225

return {}

226

a = y.ancestor(x)

226

a = y.ancestor(x)

227

if a == x:

227

if a == x:

228

return _forwardcopies(x, y, match=match)

228

return _forwardcopies(x, y, match=match)

229

if a == y:

229

if a == y:

230

return _backwardrenames(x, y)

230

return _backwardrenames(x, y)

231

return _chain(x, y, _backwardrenames(x, a),

231

return _chain(x, y, _backwardrenames(x, a),

232

_forwardcopies(a, y, match=match))

232

_forwardcopies(a, y, match=match))

233

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

236

and c2. This is its own function so extensions can easily wrap this call

236

and c2. This is its own function so extensions can easily wrap this call

237

to see what files mergecopies is about to process.

237

to see what files mergecopies is about to process.

238

239

Even though c1 and c2 are not used in this function, they are useful in

239

Even though c1 and c2 are not used in this function, they are useful in

240

other extensions for being able to read the file nodes of the changed files.

240

other extensions for being able to read the file nodes of the changed files.

241

"""

241

"""

242

u1 = sorted(addedinm1 - addedinm2)

242

u1 = sorted(addedinm1 - addedinm2)

243

u2 = sorted(addedinm2 - addedinm1)

243

u2 = sorted(addedinm2 - addedinm1)

244

245

if u1:

245

if u1:

246

repo.ui.debug(" unmatched files in local:\n %s\n"

246

repo.ui.debug(" unmatched files in local:\n %s\n"

247

% "\n ".join(u1))

247

% "\n ".join(u1))

248

if u2:

248

if u2:

249

repo.ui.debug(" unmatched files in other:\n %s\n"

249

repo.ui.debug(" unmatched files in other:\n %s\n"

250

% "\n ".join(u2))

250

% "\n ".join(u2))

251

return u1, u2

251

return u1, u2

252

253

def _makegetfctx(ctx):

253

def _makegetfctx(ctx):

254

"""return a 'getfctx' function suitable for _checkcopies usage

254

"""return a 'getfctx' function suitable for _checkcopies usage

255

256

We have to re-setup the function building 'filectx' for each

256

We have to re-setup the function building 'filectx' for each

257

'_checkcopies' to ensure the linkrev adjustment is properly setup for

257

'_checkcopies' to ensure the linkrev adjustment is properly setup for

258

each. Linkrev adjustment is important to avoid bug in rename

258

each. Linkrev adjustment is important to avoid bug in rename

259

detection. Moreover, having a proper '_ancestrycontext' setup ensures

259

detection. Moreover, having a proper '_ancestrycontext' setup ensures

260

the performance impact of this adjustment is kept limited. Without it,

260

the performance impact of this adjustment is kept limited. Without it,

261

each file could do a full dag traversal making the time complexity of

261

each file could do a full dag traversal making the time complexity of

262

the operation explode (see issue4537).

262

the operation explode (see issue4537).

263

264

This function exists here mostly to limit the impact on stable. Feel

264

This function exists here mostly to limit the impact on stable. Feel

265

free to refactor on default.

265

free to refactor on default.

266

"""

266

"""

267

rev = ctx.rev()

267

rev = ctx.rev()

268

repo = ctx._repo

268

repo = ctx._repo

269

ac = getattr(ctx, '_ancestrycontext', None)

269

ac = getattr(ctx, '_ancestrycontext', None)

270

if ac is None:

270

if ac is None:

271

revs = [rev]

271

revs = [rev]

272

if rev is None:

272

if rev is None:

273

revs = [p.rev() for p in ctx.parents()]

273

revs = [p.rev() for p in ctx.parents()]

274

ac = repo.changelog.ancestors(revs, inclusive=True)

274

ac = repo.changelog.ancestors(revs, inclusive=True)

275

ctx._ancestrycontext = ac

275

ctx._ancestrycontext = ac

276

def makectx(f, n):

276

def makectx(f, n):

277

if len(n) != 20: # in a working context?

277

if len(n) != 20: # in a working context?

278

if ctx.rev() is None:

278

if ctx.rev() is None:

279

return ctx.filectx(f)

279

return ctx.filectx(f)

280

return repo[None][f]

280

return repo[None][f]

281

fctx = repo.filectx(f, fileid=n)

281

fctx = repo.filectx(f, fileid=n)

282

# setup only needed for filectx not create from a changectx

282

# setup only needed for filectx not create from a changectx

283

fctx._ancestrycontext = ac

283

fctx._ancestrycontext = ac

284

fctx._descendantrev = rev

284

fctx._descendantrev = rev

285

return fctx

285

return fctx

286

return util.lrucachefunc(makectx)

286

return util.lrucachefunc(makectx)

287

288

def mergecopies(repo, c1, c2, ca):

288

def mergecopies(repo, c1, c2, ca):

289

"""

289

"""

290

Find moves and copies between context c1 and c2 that are relevant

290

Find moves and copies between context c1 and c2 that are relevant

291

for merging.

291

for merging.

292

293

Returns four dicts: "copy", "movewithdir", "diverge", and

293

Returns four dicts: "copy", "movewithdir", "diverge", and

294

"renamedelete".

294

"renamedelete".

295

296

"copy" is a mapping from destination name -> source name,

296

"copy" is a mapping from destination name -> source name,

297

where source is in c1 and destination is in c2 or vice-versa.

297

where source is in c1 and destination is in c2 or vice-versa.

298

299

"movewithdir" is a mapping from source name -> destination name,

299

"movewithdir" is a mapping from source name -> destination name,

300

where the file at source present in one context but not the other

300

where the file at source present in one context but not the other

301

needs to be moved to destination by the merge process, because the

301

needs to be moved to destination by the merge process, because the

302

other context moved the directory it is in.

302

other context moved the directory it is in.

303

304

"diverge" is a mapping of source name -> list of destination names

304

"diverge" is a mapping of source name -> list of destination names

305

for divergent renames.

305

for divergent renames.

306

307

"renamedelete" is a mapping of source name -> list of destination

307

"renamedelete" is a mapping of source name -> list of destination

308

names for files deleted in c1 that were renamed in c2 or vice-versa.

308

names for files deleted in c1 that were renamed in c2 or vice-versa.

309

"""

309

"""

310

# avoid silly behavior for update from empty dir

310

# avoid silly behavior for update from empty dir

311

if not c1 or not c2 or c1 == c2:

311

if not c1 or not c2 or c1 == c2:

312

return {}, {}, {}, {}

312

return {}, {}, {}, {}

313

314

# avoid silly behavior for parent -> working dir

314

# avoid silly behavior for parent -> working dir

315

if c2.node() is None and c1.node() == repo.dirstate.p1():

315

if c2.node() is None and c1.node() == repo.dirstate.p1():

316

return repo.dirstate.copies(), {}, {}, {}

316

return repo.dirstate.copies(), {}, {}, {}

317

318

# Copy trace disabling is explicitly below the node == p1 logic above

318

# Copy trace disabling is explicitly below the node == p1 logic above

319

# because the logic above is required for a simple copy to be kept across a

319

# because the logic above is required for a simple copy to be kept across a

320

# rebase.

320

# rebase.

321

if repo.ui.configbool('experimental', 'disablecopytrace'):

321

if repo.ui.configbool('experimental', 'disablecopytrace'):

322

return {}, {}, {}, {}

322

return {}, {}, {}, {}

323

324

limit = _findlimit(repo, c1.rev(), c2.rev())

324

limit = _findlimit(repo, c1.rev(), c2.rev())

325

if limit is None:

325

if limit is None:

326

# no common ancestor, no copies

326

# no common ancestor, no copies

327

return {}, {}, {}, {}

327

return {}, {}, {}, {}

328

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

328

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

329

330

m1 = c1.manifest()

330

m1 = c1.manifest()

331

m2 = c2.manifest()

331

m2 = c2.manifest()

332

ma = ca.manifest()

332

ma = ca.manifest()

333

334

# see _checkcopies documentation below for these dicts

334

# see _checkcopies documentation below for these dicts

335

copy1, copy2 = {}, {}

335

diverge = {} # divergence data is shared

336

fullcopy1, fullcopy2 = {}, {}

336

data1 = {'copy': {},

337

diverge = {}

337

'fullcopy': {},

338

'diverge': diverge,

339

}

340

data2 = {'copy': {},

341

'fullcopy': {},

342

'diverge': diverge,

343

}

338

344

339

# find interesting file sets from manifests

345

# find interesting file sets from manifests

340

addedinm1 = m1.filesnotin(ma)

346

addedinm1 = m1.filesnotin(ma)

341

addedinm2 = m2.filesnotin(ma)

347

addedinm2 = m2.filesnotin(ma)

342

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

348

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

343

u1u, u2u = u1r, u2r

349

u1u, u2u = u1r, u2r

344

bothnew = sorted(addedinm1 & addedinm2)

350

bothnew = sorted(addedinm1 & addedinm2)

345

351

346

for f in u1u:

352

for f in u1u:

347

_checkcopies(c1, f, m1, m2, ca, limit, d~~iverge~~, ~~copy1~~, ~~fullcopy~~1)

353

_checkcopies(c1, f, m1, m2, ca, limit, data1)

348

354

349

for f in u2u:

355

for f in u2u:

350

_checkcopies(c2, f, m2, m1, ca, limit, d~~iverge~~, ~~copy2~~, ~~fullcopy~~2)

356

_checkcopies(c2, f, m2, m1, ca, limit, data2)

351

357

352

copy = dict(~~copy1~~.items() + ~~copy2~~.items())

358

copy = dict(data1['copy'].items() + data2['copy'].items())

353

fullcopy = dict(fullcopy1.items() + fullcopy2.items())

359

fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())

354

360

355

renamedelete = {}

361

renamedelete = {}

356

renamedeleteset = set()

362

renamedeleteset = set()

357

divergeset = set()

363

divergeset = set()

358

for of, fl in diverge.items():

364

for of, fl in diverge.items():

359

if len(fl) == 1 or of in c1 or of in c2:

365

if len(fl) == 1 or of in c1 or of in c2:

360

del diverge[of] # not actually divergent, or not a rename

366

del diverge[of] # not actually divergent, or not a rename

361

if of not in c1 and of not in c2:

367

if of not in c1 and of not in c2:

362

# renamed on one side, deleted on the other side, but filter

368

# renamed on one side, deleted on the other side, but filter

363

# out files that have been renamed and then deleted

369

# out files that have been renamed and then deleted

364

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

370

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

365

renamedeleteset.update(fl) # reverse map for below

371

renamedeleteset.update(fl) # reverse map for below

366

else:

372

else:

367

divergeset.update(fl) # reverse map for below

373

divergeset.update(fl) # reverse map for below

368

374

369

if bothnew:

375

if bothnew:

370

repo.ui.debug(" unmatched files new in both:\n %s\n"

376

repo.ui.debug(" unmatched files new in both:\n %s\n"

371

% "\n ".join(bothnew))

377

% "\n ".join(bothnew))

372

bothdiverge, ~~_copy~~, ~~_fullcopy~~ = {}, ~~{},~~ {}

378

bothdiverge = {}

379

bothdata = {'copy': {},

380

'fullcopy': {},

381

'diverge': bothdiverge,

382

}

373

for f in bothnew:

383

for f in bothnew:

374

_checkcopies(c1, f, m1, m2, ca, limit, bothd~~iverge~~, ~~_copy~~, ~~_fullcopy~~)

384

_checkcopies(c1, f, m1, m2, ca, limit, bothdata)

375

_checkcopies(c2, f, m2, m1, ca, limit, bothd~~iverge~~, ~~_copy~~, ~~_fullcopy~~)

385

_checkcopies(c2, f, m2, m1, ca, limit, bothdata)

376

for of, fl in bothdiverge.items():

386

for of, fl in bothdiverge.items():

377

if len(fl) == 2 and fl[0] == fl[1]:

387

if len(fl) == 2 and fl[0] == fl[1]:

378

copy[fl[0]] = of # not actually divergent, just matching renames

388

copy[fl[0]] = of # not actually divergent, just matching renames

379

389

380

if fullcopy and repo.ui.debugflag:

390

if fullcopy and repo.ui.debugflag:

381

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

391

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

382

"% = renamed and deleted):\n")

392

"% = renamed and deleted):\n")

383

for f in sorted(fullcopy):

393

for f in sorted(fullcopy):

384

note = ""

394

note = ""

385

if f in copy:

395

if f in copy:

386

note += "*"

396

note += "*"

387

if f in divergeset:

397

if f in divergeset:

388

note += "!"

398

note += "!"

389

if f in renamedeleteset:

399

if f in renamedeleteset:

390

note += "%"

400

note += "%"

391

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

401

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

392

note))

402

note))

393

del divergeset

403

del divergeset

394

404

395

if not fullcopy:

405

if not fullcopy:

396

return copy, {}, diverge, renamedelete

406

return copy, {}, diverge, renamedelete

397

407

398

repo.ui.debug(" checking for directory renames\n")

408

repo.ui.debug(" checking for directory renames\n")

399

409

400

# generate a directory move map

410

# generate a directory move map

401

d1, d2 = c1.dirs(), c2.dirs()

411

d1, d2 = c1.dirs(), c2.dirs()

402

# Hack for adding '', which is not otherwise added, to d1 and d2

412

# Hack for adding '', which is not otherwise added, to d1 and d2

403

d1.addpath('/')

413

d1.addpath('/')

404

d2.addpath('/')

414

d2.addpath('/')

405

invalid = set()

415

invalid = set()

406

dirmove = {}

416

dirmove = {}

407

417

408

# examine each file copy for a potential directory move, which is

418

# examine each file copy for a potential directory move, which is

409

# when all the files in a directory are moved to a new directory

419

# when all the files in a directory are moved to a new directory

410

for dst, src in fullcopy.iteritems():

420

for dst, src in fullcopy.iteritems():

411

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

421

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

412

if dsrc in invalid:

422

if dsrc in invalid:

413

# already seen to be uninteresting

423

# already seen to be uninteresting

414

continue

424

continue

415

elif dsrc in d1 and ddst in d1:

425

elif dsrc in d1 and ddst in d1:

416

# directory wasn't entirely moved locally

426

# directory wasn't entirely moved locally

417

invalid.add(dsrc + "/")

427

invalid.add(dsrc + "/")

418

elif dsrc in d2 and ddst in d2:

428

elif dsrc in d2 and ddst in d2:

419

# directory wasn't entirely moved remotely

429

# directory wasn't entirely moved remotely

420

invalid.add(dsrc + "/")

430

invalid.add(dsrc + "/")

421

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

431

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

422

# files from the same directory moved to two different places

432

# files from the same directory moved to two different places

423

invalid.add(dsrc + "/")

433

invalid.add(dsrc + "/")

424

else:

434

else:

425

# looks good so far

435

# looks good so far

426

dirmove[dsrc + "/"] = ddst + "/"

436

dirmove[dsrc + "/"] = ddst + "/"

427

437

428

for i in invalid:

438

for i in invalid:

429

if i in dirmove:

439

if i in dirmove:

430

del dirmove[i]

440

del dirmove[i]

431

del d1, d2, invalid

441

del d1, d2, invalid

432

442

433

if not dirmove:

443

if not dirmove:

434

return copy, {}, diverge, renamedelete

444

return copy, {}, diverge, renamedelete

435

445

436

for d in dirmove:

446

for d in dirmove:

437

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

447

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

438

(d, dirmove[d]))

448

(d, dirmove[d]))

439

449

440

movewithdir = {}

450

movewithdir = {}

441

# check unaccounted nonoverlapping files against directory moves

451

# check unaccounted nonoverlapping files against directory moves

442

for f in u1r + u2r:

452

for f in u1r + u2r:

443

if f not in fullcopy:

453

if f not in fullcopy:

444

for d in dirmove:

454

for d in dirmove:

445

if f.startswith(d):

455

if f.startswith(d):

446

# new file added in a directory that was moved, move it

456

# new file added in a directory that was moved, move it

447

df = dirmove[d] + f[len(d):]

457

df = dirmove[d] + f[len(d):]

448

if df not in copy:

458

if df not in copy:

449

movewithdir[f] = df

459

movewithdir[f] = df

450

repo.ui.debug((" pending file src: '%s' -> "

460

repo.ui.debug((" pending file src: '%s' -> "

451

"dst: '%s'\n") % (f, df))

461

"dst: '%s'\n") % (f, df))

452

break

462

break

453

463

454

return copy, movewithdir, diverge, renamedelete

464

return copy, movewithdir, diverge, renamedelete

455

465

456

def _related(f1, f2, limit):

466

def _related(f1, f2, limit):

457

"""return True if f1 and f2 filectx have a common ancestor

467

"""return True if f1 and f2 filectx have a common ancestor

458

468

459

Walk back to common ancestor to see if the two files originate

469

Walk back to common ancestor to see if the two files originate

460

from the same file. Since workingfilectx's rev() is None it messes

470

from the same file. Since workingfilectx's rev() is None it messes

461

up the integer comparison logic, hence the pre-step check for

471

up the integer comparison logic, hence the pre-step check for

462

None (f1 and f2 can only be workingfilectx's initially).

472

None (f1 and f2 can only be workingfilectx's initially).

463

"""

473

"""

464

474

465

if f1 == f2:

475

if f1 == f2:

466

return f1 # a match

476

return f1 # a match

467

477

468

g1, g2 = f1.ancestors(), f2.ancestors()

478

g1, g2 = f1.ancestors(), f2.ancestors()

469

try:

479

try:

470

f1r, f2r = f1.linkrev(), f2.linkrev()

480

f1r, f2r = f1.linkrev(), f2.linkrev()

471

481

472

if f1r is None:

482

if f1r is None:

473

f1 = next(g1)

483

f1 = next(g1)

474

if f2r is None:

484

if f2r is None:

475

f2 = next(g2)

485

f2 = next(g2)

476

486

477

while True:

487

while True:

478

f1r, f2r = f1.linkrev(), f2.linkrev()

488

f1r, f2r = f1.linkrev(), f2.linkrev()

479

if f1r > f2r:

489

if f1r > f2r:

480

f1 = next(g1)

490

f1 = next(g1)

481

elif f2r > f1r:

491

elif f2r > f1r:

482

f2 = next(g2)

492

f2 = next(g2)

483

elif f1 == f2:

493

elif f1 == f2:

484

return f1 # a match

494

return f1 # a match

485

elif f1r == f2r or f1r < limit or f2r < limit:

495

elif f1r == f2r or f1r < limit or f2r < limit:

486

return False # copy no longer relevant

496

return False # copy no longer relevant

487

except StopIteration:

497

except StopIteration:

488

return False

498

return False

489

499

490

def _checkcopies(ctx, f, m1, m2, base, limit, d~~iverge~~, ~~copy~~, ~~fullcopy~~):

500

def _checkcopies(ctx, f, m1, m2, base, limit, data):

491

"""

501

"""

492

check possible copies of f from m1 to m2

502

check possible copies of f from m1 to m2

493

503

494

ctx = starting context for f in m1

504

ctx = starting context for f in m1

495

f = the filename to check (as in m1)

505

f = the filename to check (as in m1)

496

m1 = the source manifest

506

m1 = the source manifest

497

m2 = the destination manifest

507

m2 = the destination manifest

498

base = the changectx used as a merge base

508

base = the changectx used as a merge base

499

limit = the rev number to not search beyond

509

limit = the rev number to not search beyond

500

diverge = record all diverges in this dict

510

data = dictionary of dictionary to store copy data. The keys are:

501

~~copy~~ = record all ~~non-~~diverg~~ent copi~~es in this dict

511

- diverge = record all diverges in this dict

502

~~full~~copy = record all copies in this dict

512

- copy = record all non-divergent copies in this dict

513

- fullcopy = record all copies in this dict

503

514

504

note: limit is only an optimization, and there is no guarantee that

515

note: limit is only an optimization, and there is no guarantee that

505

irrelevant revisions will not be limited

516

irrelevant revisions will not be limited

506

there is no easy way to make this algorithm stop in a guaranteed way

517

there is no easy way to make this algorithm stop in a guaranteed way

507

once it "goes behind a certain revision".

518

once it "goes behind a certain revision".

508

"""

519

"""

509

520

510

mb = base.manifest()

521

mb = base.manifest()

511

getfctx = _makegetfctx(ctx)

522

getfctx = _makegetfctx(ctx)

512

523

513

of = None

524

of = None

514

seen = set([f])

525

seen = set([f])

515

for oc in getfctx(f, m1[f]).ancestors():

526

for oc in getfctx(f, m1[f]).ancestors():

516

ocr = oc.linkrev()

527

ocr = oc.linkrev()

517

of = oc.path()

528

of = oc.path()

518

if of in seen:

529

if of in seen:

519

# check limit late - grab last rename before

530

# check limit late - grab last rename before

520

if ocr < limit:

531

if ocr < limit:

521

break

532

break

522

continue

533

continue

523

seen.add(of)

534

seen.add(of)

524

535

525

fullcopy[f] = of # remember for dir rename detection

536

data['fullcopy'][f] = of # remember for dir rename detection

526

if of not in m2:

537

if of not in m2:

527

continue # no match, keep looking

538

continue # no match, keep looking

528

if m2[of] == mb.get(of):

539

if m2[of] == mb.get(of):

529

return # no merge needed, quit early

540

return # no merge needed, quit early

530

c2 = getfctx(of, m2[of])

541

c2 = getfctx(of, m2[of])

531

# c2 might be a plain new file on added on destination side that is

542

# c2 might be a plain new file on added on destination side that is

532

# unrelated to the droids we are looking for.

543

# unrelated to the droids we are looking for.

533

cr = _related(oc, c2, base.rev())

544

cr = _related(oc, c2, base.rev())

534

if cr and (of == f or of == c2.path()): # non-divergent

545

if cr and (of == f or of == c2.path()): # non-divergent

535

copy[f] = of

546

data['copy'][f] = of

536

return

547

return

537

548

538

if of in mb:

549

if of in mb:

539

diverge.setdefault(of, []).append(f)

550

data['diverge'].setdefault(of, []).append(f)

540

551

541

def duplicatecopies(repo, rev, fromrev, skiprev=None):

552

def duplicatecopies(repo, rev, fromrev, skiprev=None):

542

'''reproduce copies from fromrev to rev in the dirstate

553

'''reproduce copies from fromrev to rev in the dirstate

543

554

544

If skiprev is specified, it's a revision that should be used to

555

If skiprev is specified, it's a revision that should be used to

545

filter copy records. Any copies that occur between fromrev and

556

filter copy records. Any copies that occur between fromrev and

546

skiprev will not be duplicated, even if they appear in the set of

557

skiprev will not be duplicated, even if they appear in the set of

547

copies between fromrev and rev.

558

copies between fromrev and rev.

548

'''

559

'''

549

exclude = {}

560

exclude = {}

550

if (skiprev is not None and

561

if (skiprev is not None and

551

not repo.ui.configbool('experimental', 'disablecopytrace')):

562

not repo.ui.configbool('experimental', 'disablecopytrace')):

552

# disablecopytrace skips this line, but not the entire function because

563

# disablecopytrace skips this line, but not the entire function because

553

# the line below is O(size of the repo) during a rebase, while the rest

564

# the line below is O(size of the repo) during a rebase, while the rest

554

# of the function is much faster (and is required for carrying copy

565

# of the function is much faster (and is required for carrying copy

555

# metadata across the rebase anyway).

566

# metadata across the rebase anyway).

556

exclude = pathcopies(repo[fromrev], repo[skiprev])

567

exclude = pathcopies(repo[fromrev], repo[skiprev])

557

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

568

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

558

# copies.pathcopies returns backward renames, so dst might not

569

# copies.pathcopies returns backward renames, so dst might not

559

# actually be in the dirstate

570

# actually be in the dirstate

560

if dst in exclude:

571

if dst in exclude:

561

continue

572

continue

562

if repo.dirstate[dst] in "nma":

573

if repo.dirstate[dst] in "nma":

563

repo.dirstate.copy(src, dst)

574

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import heapq
             from . import (
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 if match:
                     ma = ma.matches(match)
                     mb = mb.matches(match)
                 return mb.filesnotin(ma)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if not match and b.p1() == a and b.p2().node() == node.nullid:
                     forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if len(n) != 20:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}, {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 # see _checkcopies documentation below for these dicts
-                copy1, copy2 = {}, {}
+                diverge = {} # divergence data is shared
-                fullcopy1, fullcopy2 = {}, {}
+                data1 = {'copy': {},
-                diverge = {}
+                         'fullcopy': {},
+                         'diverge': diverge,
+                        }
+                data2 = {'copy': {},
+                         'fullcopy': {},
+                         'diverge': diverge,
+                        }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(ma)
                 addedinm2 = m2.filesnotin(ma)
                 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                 u1u, u2u = u1r, u2r
                 bothnew = sorted(addedinm1 & addedinm2)
                 for f in u1u:
-                    _checkcopies(c1, f, m1, m2, ca, limit, diverge, copy1, fullcopy1)
+                    _checkcopies(c1, f, m1, m2, ca, limit, data1)
                 for f in u2u:
-                    _checkcopies(c2, f, m2, m1, ca, limit, diverge, copy2, fullcopy2)
+                    _checkcopies(c2, f, m2, m1, ca, limit, data2)
-                copy = dict(copy1.items() + copy2.items())
+                copy = dict(data1['copy'].items() + data2['copy'].items())
-                fullcopy = dict(fullcopy1.items() + fullcopy2.items())
+                fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
-                bothdiverge, _copy, _fullcopy = {}, {}, {}
+                bothdiverge = {}
+                bothdata = {'copy': {},
+                            'fullcopy': {},
+                            'diverge': bothdiverge,
+                           }
                 for f in bothnew:
-                    _checkcopies(c1, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
+                    _checkcopies(c1, f, m1, m2, ca, limit, bothdata)
-                    _checkcopies(c2, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
+                    _checkcopies(c2, f, m2, m1, ca, limit, bothdata)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def _related(f1, f2, limit):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         elif f1 == f2:
                             return f1 # a match
                         elif f1r == f2r or f1r < limit or f2r < limit:
                             return False # copy no longer relevant
                 except StopIteration:
                     return False
-            def _checkcopies(ctx, f, m1, m2, base, limit, diverge, copy, fullcopy):
+            def _checkcopies(ctx, f, m1, m2, base, limit, data):
                 """
                 check possible copies of f from m1 to m2
                 ctx = starting context for f in m1
                 f = the filename to check (as in m1)
                 m1 = the source manifest
                 m2 = the destination manifest
                 base = the changectx used as a merge base
                 limit = the rev number to not search beyond
-                diverge = record all diverges in this dict
+                data = dictionary of dictionary to store copy data. The keys are:
-                copy = record all non-divergent copies in this dict
+                - diverge = record all diverges in this dict
-                fullcopy = record all copies in this dict
+                - copy = record all non-divergent copies in this dict
+                - fullcopy = record all copies in this dict
                 note: limit is only an optimization, and there is no guarantee that
                 irrelevant revisions will not be limited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 mb = base.manifest()
                 getfctx = _makegetfctx(ctx)
                 of = None
                 seen = set([f])
                 for oc in getfctx(f, m1[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
-                    fullcopy[f] = of # remember for dir rename detection
+                    data['fullcopy'][f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getfctx(of, m2[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, base.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
-                        copy[f] = of
+                        data['copy'][f] = of
                         return
                 if of in mb:
-                    diverge.setdefault(of, []).append(f)
+                    data['diverge'].setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if (skiprev is not None and
                     not repo.ui.configbool('experimental', 'disablecopytrace')):
                     # disablecopytrace skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)