upstream/mercurial-mirror Commit - r30186:f7ed5af3

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import heapq

10

import heapq

11

12

from . import (

12

from . import (

13

node,

13

node,

14

pathutil,

14

pathutil,

15

scmutil,

15

scmutil,

16

util,

16

util,

17

)

17

)

18

19

def _findlimit(repo, a, b):

19

def _findlimit(repo, a, b):

20

"""

20

"""

21

Find the last revision that needs to be checked to ensure that a full

21

Find the last revision that needs to be checked to ensure that a full

22

transitive closure for file copies can be properly calculated.

22

transitive closure for file copies can be properly calculated.

23

Generally, this means finding the earliest revision number that's an

23

Generally, this means finding the earliest revision number that's an

24

ancestor of a or b but not both, except when a or b is a direct descendent

24

ancestor of a or b but not both, except when a or b is a direct descendent

25

of the other, in which case we can return the minimum revnum of a and b.

25

of the other, in which case we can return the minimum revnum of a and b.

26

None if no such revision exists.

26

None if no such revision exists.

27

"""

27

"""

28

29

# basic idea:

29

# basic idea:

30

# - mark a and b with different sides

30

# - mark a and b with different sides

31

# - if a parent's children are all on the same side, the parent is

31

# - if a parent's children are all on the same side, the parent is

32

# on that side, otherwise it is on no side

32

# on that side, otherwise it is on no side

33

# - walk the graph in topological order with the help of a heap;

33

# - walk the graph in topological order with the help of a heap;

34

# - add unseen parents to side map

34

# - add unseen parents to side map

35

# - clear side of any parent that has children on different sides

35

# - clear side of any parent that has children on different sides

36

# - track number of interesting revs that might still be on a side

36

# - track number of interesting revs that might still be on a side

37

# - track the lowest interesting rev seen

37

# - track the lowest interesting rev seen

38

# - quit when interesting revs is zero

38

# - quit when interesting revs is zero

39

40

cl = repo.changelog

40

cl = repo.changelog

41

working = len(cl) # pseudo rev for the working directory

41

working = len(cl) # pseudo rev for the working directory

42

if a is None:

42

if a is None:

43

a = working

43

a = working

44

if b is None:

44

if b is None:

45

b = working

45

b = working

46

47

side = {a: -1, b: 1}

47

side = {a: -1, b: 1}

48

visit = [-a, -b]

48

visit = [-a, -b]

49

heapq.heapify(visit)

49

heapq.heapify(visit)

50

interesting = len(visit)

50

interesting = len(visit)

51

hascommonancestor = False

51

hascommonancestor = False

52

limit = working

52

limit = working

53

54

while interesting:

54

while interesting:

55

r = -heapq.heappop(visit)

55

r = -heapq.heappop(visit)

56

if r == working:

56

if r == working:

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

58

else:

58

else:

59

parents = cl.parentrevs(r)

59

parents = cl.parentrevs(r)

60

for p in parents:

60

for p in parents:

61

if p < 0:

61

if p < 0:

62

continue

62

continue

63

if p not in side:

63

if p not in side:

64

# first time we see p; add it to visit

64

# first time we see p; add it to visit

65

side[p] = side[r]

65

side[p] = side[r]

66

if side[p]:

66

if side[p]:

67

interesting += 1

67

interesting += 1

68

heapq.heappush(visit, -p)

68

heapq.heappush(visit, -p)

69

elif side[p] and side[p] != side[r]:

69

elif side[p] and side[p] != side[r]:

70

# p was interesting but now we know better

70

# p was interesting but now we know better

71

side[p] = 0

71

side[p] = 0

72

interesting -= 1

72

interesting -= 1

73

hascommonancestor = True

73

hascommonancestor = True

74

if side[r]:

74

if side[r]:

75

limit = r # lowest rev visited

75

limit = r # lowest rev visited

76

interesting -= 1

76

interesting -= 1

77

78

if not hascommonancestor:

78

if not hascommonancestor:

79

return None

79

return None

80

81

# Consider the following flow (see test-commit-amend.t under issue4405):

81

# Consider the following flow (see test-commit-amend.t under issue4405):

82

# 1/ File 'a0' committed

82

# 1/ File 'a0' committed

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

84

# 3/ Move back to first commit

84

# 3/ Move back to first commit

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

87

#

87

#

88

# During the amend in step five, we will be in this state:

88

# During the amend in step five, we will be in this state:

89

#

89

#

90

# @ 3 temporary amend commit for a1-amend

90

# @ 3 temporary amend commit for a1-amend

91

# |

91

# |

92

# o 2 a1-amend

92

# o 2 a1-amend

93

# |

93

# |

94

# | o 1 a1

94

# | o 1 a1

95

# |/

95

# |/

96

# o 0 a0

96

# o 0 a0

97

#

97

#

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

99

# yet the filelog has the copy information in rev 1 and we will not look

99

# yet the filelog has the copy information in rev 1 and we will not look

100

# back far enough unless we also look at the a and b as candidates.

100

# back far enough unless we also look at the a and b as candidates.

101

# This only occurs when a is a descendent of b or visa-versa.

101

# This only occurs when a is a descendent of b or visa-versa.

102

return min(limit, a, b)

102

return min(limit, a, b)

103

104

def _chain(src, dst, a, b):

104

def _chain(src, dst, a, b):

105

'''chain two sets of copies a->b'''

105

'''chain two sets of copies a->b'''

106

t = a.copy()

106

t = a.copy()

107

for k, v in b.iteritems():

107

for k, v in b.iteritems():

108

if v in t:

108

if v in t:

109

# found a chain

109

# found a chain

110

if t[v] != k:

110

if t[v] != k:

111

# file wasn't renamed back to itself

111

# file wasn't renamed back to itself

112

t[k] = t[v]

112

t[k] = t[v]

113

if v not in dst:

113

if v not in dst:

114

# chain was a rename, not a copy

114

# chain was a rename, not a copy

115

del t[v]

115

del t[v]

116

if v in src:

116

if v in src:

117

# file is a copy of an existing file

117

# file is a copy of an existing file

118

t[k] = v

118

t[k] = v

119

120

# remove criss-crossed copies

120

# remove criss-crossed copies

121

for k, v in t.items():

121

for k, v in t.items():

122

if k in src and v in dst:

122

if k in src and v in dst:

123

del t[k]

123

del t[k]

124

125

return t

125

return t

126

127

def _tracefile(fctx, am, limit=-1):

127

def _tracefile(fctx, am, limit=-1):

128

'''return file context that is the ancestor of fctx present in ancestor

128

'''return file context that is the ancestor of fctx present in ancestor

129

manifest am, stopping after the first ancestor lower than limit'''

129

manifest am, stopping after the first ancestor lower than limit'''

130

131

for f in fctx.ancestors():

131

for f in fctx.ancestors():

132

if am.get(f.path(), None) == f.filenode():

132

if am.get(f.path(), None) == f.filenode():

133

return f

133

return f

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

135

return None

135

return None

136

137

def _dirstatecopies(d):

137

def _dirstatecopies(d):

138

ds = d._repo.dirstate

138

ds = d._repo.dirstate

139

c = ds.copies().copy()

139

c = ds.copies().copy()

140

for k in c.keys():

140

for k in c.keys():

141

if ds[k] not in 'anm':

141

if ds[k] not in 'anm':

142

del c[k]

142

del c[k]

143

return c

143

return c

144

145

def _computeforwardmissing(a, b, match=None):

145

def _computeforwardmissing(a, b, match=None):

146

"""Computes which files are in b but not a.

146

"""Computes which files are in b but not a.

147

This is its own function so extensions can easily wrap this call to see what

147

This is its own function so extensions can easily wrap this call to see what

148

files _forwardcopies is about to process.

148

files _forwardcopies is about to process.

149

"""

149

"""

150

ma = a.manifest()

150

ma = a.manifest()

151

mb = b.manifest()

151

mb = b.manifest()

152

if match:

152

if match:

153

ma = ma.matches(match)

153

ma = ma.matches(match)

154

mb = mb.matches(match)

154

mb = mb.matches(match)

155

return mb.filesnotin(ma)

155

return mb.filesnotin(ma)

156

157

def _forwardcopies(a, b, match=None):

157

def _forwardcopies(a, b, match=None):

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

159

160

# check for working copy

160

# check for working copy

161

w = None

161

w = None

162

if b.rev() is None:

162

if b.rev() is None:

163

w = b

163

w = b

164

b = w.p1()

164

b = w.p1()

165

if a == b:

165

if a == b:

166

# short-circuit to avoid issues with merge states

166

# short-circuit to avoid issues with merge states

167

return _dirstatecopies(w)

167

return _dirstatecopies(w)

168

169

# files might have to be traced back to the fctx parent of the last

169

# files might have to be traced back to the fctx parent of the last

170

# one-side-only changeset, but not further back than that

170

# one-side-only changeset, but not further back than that

171

limit = _findlimit(a._repo, a.rev(), b.rev())

171

limit = _findlimit(a._repo, a.rev(), b.rev())

172

if limit is None:

172

if limit is None:

173

limit = -1

173

limit = -1

174

am = a.manifest()

174

am = a.manifest()

175

176

# find where new files came from

176

# find where new files came from

177

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

178

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

179

cm = {}

179

cm = {}

180

181

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

182

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

183

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

184

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

185

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

186

# this comparison.

186

# this comparison.

187

forwardmissingmatch = match

187

forwardmissingmatch = match

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

for f in missing:

193

for f in missing:

194

fctx = b[f]

194

fctx = b[f]

195

fctx._ancestrycontext = ancestrycontext

195

fctx._ancestrycontext = ancestrycontext

196

ofctx = _tracefile(fctx, am, limit)

196

ofctx = _tracefile(fctx, am, limit)

197

if ofctx:

197

if ofctx:

198

cm[f] = ofctx.path()

198

cm[f] = ofctx.path()

199

200

# combine copies from dirstate if necessary

200

# combine copies from dirstate if necessary

201

if w is not None:

201

if w is not None:

202

cm = _chain(a, w, cm, _dirstatecopies(w))

202

cm = _chain(a, w, cm, _dirstatecopies(w))

203

204

return cm

204

return cm

205

206

def _backwardrenames(a, b):

206

def _backwardrenames(a, b):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

208

return {}

208

return {}

209

210

# Even though we're not taking copies into account, 1:n rename situations

210

# Even though we're not taking copies into account, 1:n rename situations

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

212

# arbitrarily pick one of the renames.

212

# arbitrarily pick one of the renames.

213

f = _forwardcopies(b, a)

213

f = _forwardcopies(b, a)

214

r = {}

214

r = {}

215

for k, v in sorted(f.iteritems()):

215

for k, v in sorted(f.iteritems()):

216

# remove copies

216

# remove copies

217

if v in a:

217

if v in a:

218

continue

218

continue

219

r[v] = k

219

r[v] = k

220

return r

220

return r

221

222

def pathcopies(x, y, match=None):

222

def pathcopies(x, y, match=None):

223

'''find {dst@y: src@x} copy mapping for directed compare'''

223

'''find {dst@y: src@x} copy mapping for directed compare'''

224

if x == y or not x or not y:

224

if x == y or not x or not y:

225

return {}

225

return {}

226

a = y.ancestor(x)

226

a = y.ancestor(x)

227

if a == x:

227

if a == x:

228

return _forwardcopies(x, y, match=match)

228

return _forwardcopies(x, y, match=match)

229

if a == y:

229

if a == y:

230

return _backwardrenames(x, y)

230

return _backwardrenames(x, y)

231

return _chain(x, y, _backwardrenames(x, a),

231

return _chain(x, y, _backwardrenames(x, a),

232

_forwardcopies(a, y, match=match))

232

_forwardcopies(a, y, match=match))

233

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

236

and c2. This is its own function so extensions can easily wrap this call

236

and c2. This is its own function so extensions can easily wrap this call

237

to see what files mergecopies is about to process.

237

to see what files mergecopies is about to process.

238

239

Even though c1 and c2 are not used in this function, they are useful in

239

Even though c1 and c2 are not used in this function, they are useful in

240

other extensions for being able to read the file nodes of the changed files.

240

other extensions for being able to read the file nodes of the changed files.

241

"""

241

"""

242

u1 = sorted(addedinm1 - addedinm2)

242

u1 = sorted(addedinm1 - addedinm2)

243

u2 = sorted(addedinm2 - addedinm1)

243

u2 = sorted(addedinm2 - addedinm1)

244

245

if u1:

245

if u1:

246

repo.ui.debug(" unmatched files in local:\n %s\n"

246

repo.ui.debug(" unmatched files in local:\n %s\n"

247

% "\n ".join(u1))

247

% "\n ".join(u1))

248

if u2:

248

if u2:

249

repo.ui.debug(" unmatched files in other:\n %s\n"

249

repo.ui.debug(" unmatched files in other:\n %s\n"

250

% "\n ".join(u2))

250

% "\n ".join(u2))

251

return u1, u2

251

return u1, u2

252

253

def _makegetfctx(ctx):

253

def _makegetfctx(ctx):

254

"""return a 'getfctx' function suitable for _checkcopies usage

254

"""return a 'getfctx' function suitable for _checkcopies usage

255

256

We have to re-setup the function building 'filectx' for each

256

We have to re-setup the function building 'filectx' for each

257

'_checkcopies' to ensure the linkrev adjustment is properly setup for

257

'_checkcopies' to ensure the linkrev adjustment is properly setup for

258

each. Linkrev adjustment is important to avoid bug in rename

258

each. Linkrev adjustment is important to avoid bug in rename

259

detection. Moreover, having a proper '_ancestrycontext' setup ensures

259

detection. Moreover, having a proper '_ancestrycontext' setup ensures

260

the performance impact of this adjustment is kept limited. Without it,

260

the performance impact of this adjustment is kept limited. Without it,

261

each file could do a full dag traversal making the time complexity of

261

each file could do a full dag traversal making the time complexity of

262

the operation explode (see issue4537).

262

the operation explode (see issue4537).

263

264

This function exists here mostly to limit the impact on stable. Feel

264

This function exists here mostly to limit the impact on stable. Feel

265

free to refactor on default.

265

free to refactor on default.

266

"""

266

"""

267

rev = ctx.rev()

267

rev = ctx.rev()

268

repo = ctx._repo

268

repo = ctx._repo

269

ac = getattr(ctx, '_ancestrycontext', None)

269

ac = getattr(ctx, '_ancestrycontext', None)

270

if ac is None:

270

if ac is None:

271

revs = [rev]

271

revs = [rev]

272

if rev is None:

272

if rev is None:

273

revs = [p.rev() for p in ctx.parents()]

273

revs = [p.rev() for p in ctx.parents()]

274

ac = repo.changelog.ancestors(revs, inclusive=True)

274

ac = repo.changelog.ancestors(revs, inclusive=True)

275

ctx._ancestrycontext = ac

275

ctx._ancestrycontext = ac

276

def makectx(f, n):

276

def makectx(f, n):

277

if len(n) != 20: # in a working context?

277

if len(n) != 20: # in a working context?

278

if ctx.rev() is None:

278

if ctx.rev() is None:

279

return ctx.filectx(f)

279

return ctx.filectx(f)

280

return repo[None][f]

280

return repo[None][f]

281

fctx = repo.filectx(f, fileid=n)

281

fctx = repo.filectx(f, fileid=n)

282

# setup only needed for filectx not create from a changectx

282

# setup only needed for filectx not create from a changectx

283

fctx._ancestrycontext = ac

283

fctx._ancestrycontext = ac

284

fctx._descendantrev = rev

284

fctx._descendantrev = rev

285

return fctx

285

return fctx

286

return util.lrucachefunc(makectx)

286

return util.lrucachefunc(makectx)

287

288

def mergecopies(repo, c1, c2, ca):

288

def mergecopies(repo, c1, c2, base):

289

"""

289

"""

290

Find moves and copies between context c1 and c2 that are relevant

290

Find moves and copies between context c1 and c2 that are relevant

291

for merging.

291

for merging. 'base' will be used as the merge base.

292

293

Returns four dicts: "copy", "movewithdir", "diverge", and

293

Returns four dicts: "copy", "movewithdir", "diverge", and

294

"renamedelete".

294

"renamedelete".

295

296

"copy" is a mapping from destination name -> source name,

296

"copy" is a mapping from destination name -> source name,

297

where source is in c1 and destination is in c2 or vice-versa.

297

where source is in c1 and destination is in c2 or vice-versa.

298

299

"movewithdir" is a mapping from source name -> destination name,

299

"movewithdir" is a mapping from source name -> destination name,

300

where the file at source present in one context but not the other

300

where the file at source present in one context but not the other

301

needs to be moved to destination by the merge process, because the

301

needs to be moved to destination by the merge process, because the

302

other context moved the directory it is in.

302

other context moved the directory it is in.

303

304

"diverge" is a mapping of source name -> list of destination names

304

"diverge" is a mapping of source name -> list of destination names

305

for divergent renames.

305

for divergent renames.

306

307

"renamedelete" is a mapping of source name -> list of destination

307

"renamedelete" is a mapping of source name -> list of destination

308

names for files deleted in c1 that were renamed in c2 or vice-versa.

308

names for files deleted in c1 that were renamed in c2 or vice-versa.

309

"""

309

"""

310

# avoid silly behavior for update from empty dir

310

# avoid silly behavior for update from empty dir

311

if not c1 or not c2 or c1 == c2:

311

if not c1 or not c2 or c1 == c2:

312

return {}, {}, {}, {}

312

return {}, {}, {}, {}

313

314

# avoid silly behavior for parent -> working dir

314

# avoid silly behavior for parent -> working dir

315

if c2.node() is None and c1.node() == repo.dirstate.p1():

315

if c2.node() is None and c1.node() == repo.dirstate.p1():

316

return repo.dirstate.copies(), {}, {}, {}

316

return repo.dirstate.copies(), {}, {}, {}

317

318

# Copy trace disabling is explicitly below the node == p1 logic above

318

# Copy trace disabling is explicitly below the node == p1 logic above

319

# because the logic above is required for a simple copy to be kept across a

319

# because the logic above is required for a simple copy to be kept across a

320

# rebase.

320

# rebase.

321

if repo.ui.configbool('experimental', 'disablecopytrace'):

321

if repo.ui.configbool('experimental', 'disablecopytrace'):

322

return {}, {}, {}, {}

322

return {}, {}, {}, {}

323

324

limit = _findlimit(repo, c1.rev(), c2.rev())

324

limit = _findlimit(repo, c1.rev(), c2.rev())

325

if limit is None:

325

if limit is None:

326

# no common ancestor, no copies

326

# no common ancestor, no copies

327

return {}, {}, {}, {}

327

return {}, {}, {}, {}

328

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

328

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

329

330

m1 = c1.manifest()

330

m1 = c1.manifest()

331

m2 = c2.manifest()

331

m2 = c2.manifest()

332

ma = ca.manifest()

332

mb = base.manifest()

333

334

# gather data from _checkcopies:

334

# gather data from _checkcopies:

335

# - diverge = record all diverges in this dict

335

# - diverge = record all diverges in this dict

336

# - copy = record all non-divergent copies in this dict

336

# - copy = record all non-divergent copies in this dict

337

# - fullcopy = record all copies in this dict

337

# - fullcopy = record all copies in this dict

338

diverge = {} # divergence data is shared

338

diverge = {} # divergence data is shared

339

data1 = {'copy': {},

339

data1 = {'copy': {},

340

'fullcopy': {},

340

'fullcopy': {},

341

'diverge': diverge,

341

'diverge': diverge,

342

}

342

}

343

data2 = {'copy': {},

343

data2 = {'copy': {},

344

'fullcopy': {},

344

'fullcopy': {},

345

'diverge': diverge,

345

'diverge': diverge,

346

}

346

}

347

348

# find interesting file sets from manifests

348

# find interesting file sets from manifests

349

addedinm1 = m1.filesnotin(ma)

349

addedinm1 = m1.filesnotin(mb)

350

addedinm2 = m2.filesnotin(ma)

350

addedinm2 = m2.filesnotin(mb)

351

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

351

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

352

u1u, u2u = u1r, u2r

352

u1u, u2u = u1r, u2r

353

bothnew = sorted(addedinm1 & addedinm2)

353

bothnew = sorted(addedinm1 & addedinm2)

354

355

for f in u1u:

355

for f in u1u:

356

_checkcopies(c1, f, m1, m2, ca, limit, data1)

356

_checkcopies(c1, f, m1, m2, base, limit, data1)

357

358

for f in u2u:

358

for f in u2u:

359

_checkcopies(c2, f, m2, m1, ca, limit, data2)

359

_checkcopies(c2, f, m2, m1, base, limit, data2)

360

361

copy = dict(data1['copy'].items() + data2['copy'].items())

361

copy = dict(data1['copy'].items() + data2['copy'].items())

362

fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())

362

fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())

363

364

renamedelete = {}

364

renamedelete = {}

365

renamedeleteset = set()

365

renamedeleteset = set()

366

divergeset = set()

366

divergeset = set()

367

for of, fl in diverge.items():

367

for of, fl in diverge.items():

368

if len(fl) == 1 or of in c1 or of in c2:

368

if len(fl) == 1 or of in c1 or of in c2:

369

del diverge[of] # not actually divergent, or not a rename

369

del diverge[of] # not actually divergent, or not a rename

370

if of not in c1 and of not in c2:

370

if of not in c1 and of not in c2:

371

# renamed on one side, deleted on the other side, but filter

371

# renamed on one side, deleted on the other side, but filter

372

# out files that have been renamed and then deleted

372

# out files that have been renamed and then deleted

373

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

373

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

374

renamedeleteset.update(fl) # reverse map for below

374

renamedeleteset.update(fl) # reverse map for below

375

else:

375

else:

376

divergeset.update(fl) # reverse map for below

376

divergeset.update(fl) # reverse map for below

377

378

if bothnew:

378

if bothnew:

379

repo.ui.debug(" unmatched files new in both:\n %s\n"

379

repo.ui.debug(" unmatched files new in both:\n %s\n"

380

% "\n ".join(bothnew))

380

% "\n ".join(bothnew))

381

bothdiverge = {}

381

bothdiverge = {}

382

bothdata = {'copy': {},

382

bothdata = {'copy': {},

383

'fullcopy': {},

383

'fullcopy': {},

384

'diverge': bothdiverge,

384

'diverge': bothdiverge,

385

}

385

}

386

for f in bothnew:

386

for f in bothnew:

387

_checkcopies(c1, f, m1, m2, ca, limit, bothdata)

387

_checkcopies(c1, f, m1, m2, base, limit, bothdata)

388

_checkcopies(c2, f, m2, m1, ca, limit, bothdata)

388

_checkcopies(c2, f, m2, m1, base, limit, bothdata)

389

for of, fl in bothdiverge.items():

389

for of, fl in bothdiverge.items():

390

if len(fl) == 2 and fl[0] == fl[1]:

390

if len(fl) == 2 and fl[0] == fl[1]:

391

copy[fl[0]] = of # not actually divergent, just matching renames

391

copy[fl[0]] = of # not actually divergent, just matching renames

392

393

if fullcopy and repo.ui.debugflag:

393

if fullcopy and repo.ui.debugflag:

394

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

394

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

395

"% = renamed and deleted):\n")

395

"% = renamed and deleted):\n")

396

for f in sorted(fullcopy):

396

for f in sorted(fullcopy):

397

note = ""

397

note = ""

398

if f in copy:

398

if f in copy:

399

note += "*"

399

note += "*"

400

if f in divergeset:

400

if f in divergeset:

401

note += "!"

401

note += "!"

402

if f in renamedeleteset:

402

if f in renamedeleteset:

403

note += "%"

403

note += "%"

404

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

404

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

405

note))

405

note))

406

del divergeset

406

del divergeset

407

408

if not fullcopy:

408

if not fullcopy:

409

return copy, {}, diverge, renamedelete

409

return copy, {}, diverge, renamedelete

410

411

repo.ui.debug(" checking for directory renames\n")

411

repo.ui.debug(" checking for directory renames\n")

412

413

# generate a directory move map

413

# generate a directory move map

414

d1, d2 = c1.dirs(), c2.dirs()

414

d1, d2 = c1.dirs(), c2.dirs()

415

# Hack for adding '', which is not otherwise added, to d1 and d2

415

# Hack for adding '', which is not otherwise added, to d1 and d2

416

d1.addpath('/')

416

d1.addpath('/')

417

d2.addpath('/')

417

d2.addpath('/')

418

invalid = set()

418

invalid = set()

419

dirmove = {}

419

dirmove = {}

420

421

# examine each file copy for a potential directory move, which is

421

# examine each file copy for a potential directory move, which is

422

# when all the files in a directory are moved to a new directory

422

# when all the files in a directory are moved to a new directory

423

for dst, src in fullcopy.iteritems():

423

for dst, src in fullcopy.iteritems():

424

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

424

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

425

if dsrc in invalid:

425

if dsrc in invalid:

426

# already seen to be uninteresting

426

# already seen to be uninteresting

427

continue

427

continue

428

elif dsrc in d1 and ddst in d1:

428

elif dsrc in d1 and ddst in d1:

429

# directory wasn't entirely moved locally

429

# directory wasn't entirely moved locally

430

invalid.add(dsrc + "/")

430

invalid.add(dsrc + "/")

431

elif dsrc in d2 and ddst in d2:

431

elif dsrc in d2 and ddst in d2:

432

# directory wasn't entirely moved remotely

432

# directory wasn't entirely moved remotely

433

invalid.add(dsrc + "/")

433

invalid.add(dsrc + "/")

434

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

434

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

435

# files from the same directory moved to two different places

435

# files from the same directory moved to two different places

436

invalid.add(dsrc + "/")

436

invalid.add(dsrc + "/")

437

else:

437

else:

438

# looks good so far

438

# looks good so far

439

dirmove[dsrc + "/"] = ddst + "/"

439

dirmove[dsrc + "/"] = ddst + "/"

440

441

for i in invalid:

441

for i in invalid:

442

if i in dirmove:

442

if i in dirmove:

443

del dirmove[i]

443

del dirmove[i]

444

del d1, d2, invalid

444

del d1, d2, invalid

445

446

if not dirmove:

446

if not dirmove:

447

return copy, {}, diverge, renamedelete

447

return copy, {}, diverge, renamedelete

448

449

for d in dirmove:

449

for d in dirmove:

450

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

450

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

451

(d, dirmove[d]))

451

(d, dirmove[d]))

452

453

movewithdir = {}

453

movewithdir = {}

454

# check unaccounted nonoverlapping files against directory moves

454

# check unaccounted nonoverlapping files against directory moves

455

for f in u1r + u2r:

455

for f in u1r + u2r:

456

if f not in fullcopy:

456

if f not in fullcopy:

457

for d in dirmove:

457

for d in dirmove:

458

if f.startswith(d):

458

if f.startswith(d):

459

# new file added in a directory that was moved, move it

459

# new file added in a directory that was moved, move it

460

df = dirmove[d] + f[len(d):]

460

df = dirmove[d] + f[len(d):]

461

if df not in copy:

461

if df not in copy:

462

movewithdir[f] = df

462

movewithdir[f] = df

463

repo.ui.debug((" pending file src: '%s' -> "

463

repo.ui.debug((" pending file src: '%s' -> "

464

"dst: '%s'\n") % (f, df))

464

"dst: '%s'\n") % (f, df))

465

break

465

break

466

467

return copy, movewithdir, diverge, renamedelete

467

return copy, movewithdir, diverge, renamedelete

468

469

def _related(f1, f2, limit):

469

def _related(f1, f2, limit):

470

"""return True if f1 and f2 filectx have a common ancestor

470

"""return True if f1 and f2 filectx have a common ancestor

471

472

Walk back to common ancestor to see if the two files originate

472

Walk back to common ancestor to see if the two files originate

473

from the same file. Since workingfilectx's rev() is None it messes

473

from the same file. Since workingfilectx's rev() is None it messes

474

up the integer comparison logic, hence the pre-step check for

474

up the integer comparison logic, hence the pre-step check for

475

None (f1 and f2 can only be workingfilectx's initially).

475

None (f1 and f2 can only be workingfilectx's initially).

476

"""

476

"""

477

478

if f1 == f2:

478

if f1 == f2:

479

return f1 # a match

479

return f1 # a match

480

481

g1, g2 = f1.ancestors(), f2.ancestors()

481

g1, g2 = f1.ancestors(), f2.ancestors()

482

try:

482

try:

483

f1r, f2r = f1.linkrev(), f2.linkrev()

483

f1r, f2r = f1.linkrev(), f2.linkrev()

484

485

if f1r is None:

485

if f1r is None:

486

f1 = next(g1)

486

f1 = next(g1)

487

if f2r is None:

487

if f2r is None:

488

f2 = next(g2)

488

f2 = next(g2)

489

490

while True:

490

while True:

491

f1r, f2r = f1.linkrev(), f2.linkrev()

491

f1r, f2r = f1.linkrev(), f2.linkrev()

492

if f1r > f2r:

492

if f1r > f2r:

493

f1 = next(g1)

493

f1 = next(g1)

494

elif f2r > f1r:

494

elif f2r > f1r:

495

f2 = next(g2)

495

f2 = next(g2)

496

elif f1 == f2:

496

elif f1 == f2:

497

return f1 # a match

497

return f1 # a match

498

elif f1r == f2r or f1r < limit or f2r < limit:

498

elif f1r == f2r or f1r < limit or f2r < limit:

499

return False # copy no longer relevant

499

return False # copy no longer relevant

500

except StopIteration:

500

except StopIteration:

501

return False

501

return False

502

503

def _checkcopies(ctx, f, m1, m2, base, limit, data):

503

def _checkcopies(ctx, f, m1, m2, base, limit, data):

504

"""

504

"""

505

check possible copies of f from m1 to m2

505

check possible copies of f from m1 to m2

506

507

ctx = starting context for f in m1

507

ctx = starting context for f in m1

508

f = the filename to check (as in m1)

508

f = the filename to check (as in m1)

509

m1 = the source manifest

509

m1 = the source manifest

510

m2 = the destination manifest

510

m2 = the destination manifest

511

base = the changectx used as a merge base

511

base = the changectx used as a merge base

512

limit = the rev number to not search beyond

512

limit = the rev number to not search beyond

513

data = dictionary of dictionary to store copy data. (see mergecopies)

513

data = dictionary of dictionary to store copy data. (see mergecopies)

514

515

note: limit is only an optimization, and there is no guarantee that

515

note: limit is only an optimization, and there is no guarantee that

516

irrelevant revisions will not be limited

516

irrelevant revisions will not be limited

517

there is no easy way to make this algorithm stop in a guaranteed way

517

there is no easy way to make this algorithm stop in a guaranteed way

518

once it "goes behind a certain revision".

518

once it "goes behind a certain revision".

519

"""

519

"""

520

521

mb = base.manifest()

521

mb = base.manifest()

522

getfctx = _makegetfctx(ctx)

522

getfctx = _makegetfctx(ctx)

523

524

of = None

524

of = None

525

seen = set([f])

525

seen = set([f])

526

for oc in getfctx(f, m1[f]).ancestors():

526

for oc in getfctx(f, m1[f]).ancestors():

527

ocr = oc.linkrev()

527

ocr = oc.linkrev()

528

of = oc.path()

528

of = oc.path()

529

if of in seen:

529

if of in seen:

530

# check limit late - grab last rename before

530

# check limit late - grab last rename before

531

if ocr < limit:

531

if ocr < limit:

532

break

532

break

533

continue

533

continue

534

seen.add(of)

534

seen.add(of)

535

536

data['fullcopy'][f] = of # remember for dir rename detection

536

data['fullcopy'][f] = of # remember for dir rename detection

537

if of not in m2:

537

if of not in m2:

538

continue # no match, keep looking

538

continue # no match, keep looking

539

if m2[of] == mb.get(of):

539

if m2[of] == mb.get(of):

540

return # no merge needed, quit early

540

return # no merge needed, quit early

541

c2 = getfctx(of, m2[of])

541

c2 = getfctx(of, m2[of])

542

# c2 might be a plain new file on added on destination side that is

542

# c2 might be a plain new file on added on destination side that is

543

# unrelated to the droids we are looking for.

543

# unrelated to the droids we are looking for.

544

cr = _related(oc, c2, base.rev())

544

cr = _related(oc, c2, base.rev())

545

if cr and (of == f or of == c2.path()): # non-divergent

545

if cr and (of == f or of == c2.path()): # non-divergent

546

data['copy'][f] = of

546

data['copy'][f] = of

547

return

547

return

548

549

if of in mb:

549

if of in mb:

550

data['diverge'].setdefault(of, []).append(f)

550

data['diverge'].setdefault(of, []).append(f)

551

552

def duplicatecopies(repo, rev, fromrev, skiprev=None):

552

def duplicatecopies(repo, rev, fromrev, skiprev=None):

553

'''reproduce copies from fromrev to rev in the dirstate

553

'''reproduce copies from fromrev to rev in the dirstate

554

555

If skiprev is specified, it's a revision that should be used to

555

If skiprev is specified, it's a revision that should be used to

556

filter copy records. Any copies that occur between fromrev and

556

filter copy records. Any copies that occur between fromrev and

557

skiprev will not be duplicated, even if they appear in the set of

557

skiprev will not be duplicated, even if they appear in the set of

558

copies between fromrev and rev.

558

copies between fromrev and rev.

559

'''

559

'''

560

exclude = {}

560

exclude = {}

561

if (skiprev is not None and

561

if (skiprev is not None and

562

not repo.ui.configbool('experimental', 'disablecopytrace')):

562

not repo.ui.configbool('experimental', 'disablecopytrace')):

563

# disablecopytrace skips this line, but not the entire function because

563

# disablecopytrace skips this line, but not the entire function because

564

# the line below is O(size of the repo) during a rebase, while the rest

564

# the line below is O(size of the repo) during a rebase, while the rest

565

# of the function is much faster (and is required for carrying copy

565

# of the function is much faster (and is required for carrying copy

566

# metadata across the rebase anyway).

566

# metadata across the rebase anyway).

567

exclude = pathcopies(repo[fromrev], repo[skiprev])

567

exclude = pathcopies(repo[fromrev], repo[skiprev])

568

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

568

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

569

# copies.pathcopies returns backward renames, so dst might not

569

# copies.pathcopies returns backward renames, so dst might not

570

# actually be in the dirstate

570

# actually be in the dirstate

571

if dst in exclude:

571

if dst in exclude:

572

continue

572

continue

573

if repo.dirstate[dst] in "nma":

573

if repo.dirstate[dst] in "nma":

574

repo.dirstate.copy(src, dst)

574

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import heapq
             from . import (
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 if match:
                     ma = ma.matches(match)
                     mb = mb.matches(match)
                 return mb.filesnotin(ma)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if not match and b.p1() == a and b.p2().node() == node.nullid:
                     forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if len(n) != 20:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
-            def mergecopies(repo, c1, c2, ca):
+            def mergecopies(repo, c1, c2, base):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
-                for merging.
+                for merging. 'base' will be used as the merge base.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}, {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
-                ma = ca.manifest()
+                mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 diverge = {} # divergence data is shared
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'diverge': diverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'diverge': diverge,
                         }
                 # find interesting file sets from manifests
-                addedinm1 = m1.filesnotin(ma)
+                addedinm1 = m1.filesnotin(mb)
-                addedinm2 = m2.filesnotin(ma)
+                addedinm2 = m2.filesnotin(mb)
                 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                 u1u, u2u = u1r, u2r
                 bothnew = sorted(addedinm1 & addedinm2)
                 for f in u1u:
-                    _checkcopies(c1, f, m1, m2, ca, limit, data1)
+                    _checkcopies(c1, f, m1, m2, base, limit, data1)
                 for f in u2u:
-                    _checkcopies(c2, f, m2, m1, ca, limit, data2)
+                    _checkcopies(c2, f, m2, m1, base, limit, data2)
                 copy = dict(data1['copy'].items() + data2['copy'].items())
                 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothdata = {'copy': {},
                             'fullcopy': {},
                             'diverge': bothdiverge,
                            }
                 for f in bothnew:
-                    _checkcopies(c1, f, m1, m2, ca, limit, bothdata)
+                    _checkcopies(c1, f, m1, m2, base, limit, bothdata)
-                    _checkcopies(c2, f, m2, m1, ca, limit, bothdata)
+                    _checkcopies(c2, f, m2, m1, base, limit, bothdata)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def _related(f1, f2, limit):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         elif f1 == f2:
                             return f1 # a match
                         elif f1r == f2r or f1r < limit or f2r < limit:
                             return False # copy no longer relevant
                 except StopIteration:
                     return False
             def _checkcopies(ctx, f, m1, m2, base, limit, data):
                 """
                 check possible copies of f from m1 to m2
                 ctx = starting context for f in m1
                 f = the filename to check (as in m1)
                 m1 = the source manifest
                 m2 = the destination manifest
                 base = the changectx used as a merge base
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and there is no guarantee that
                 irrelevant revisions will not be limited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 mb = base.manifest()
                 getfctx = _makegetfctx(ctx)
                 of = None
                 seen = set([f])
                 for oc in getfctx(f, m1[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     data['fullcopy'][f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getfctx(of, m2[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, base.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         data['copy'][f] = of
                         return
                 if of in mb:
                     data['diverge'].setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if (skiprev is not None and
                     not repo.ui.configbool('experimental', 'disablecopytrace')):
                     # disablecopytrace skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)