upstream/mercurial-mirror Commit - r30194:8c69c52c

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import heapq

10

import heapq

11

12

from . import (

12

from . import (

13

node,

13

node,

14

pathutil,

14

pathutil,

15

scmutil,

15

scmutil,

16

util,

16

util,

17

)

17

)

18

19

def _findlimit(repo, a, b):

19

def _findlimit(repo, a, b):

20

"""

20

"""

21

Find the last revision that needs to be checked to ensure that a full

21

Find the last revision that needs to be checked to ensure that a full

22

transitive closure for file copies can be properly calculated.

22

transitive closure for file copies can be properly calculated.

23

Generally, this means finding the earliest revision number that's an

23

Generally, this means finding the earliest revision number that's an

24

ancestor of a or b but not both, except when a or b is a direct descendent

24

ancestor of a or b but not both, except when a or b is a direct descendent

25

of the other, in which case we can return the minimum revnum of a and b.

25

of the other, in which case we can return the minimum revnum of a and b.

26

None if no such revision exists.

26

None if no such revision exists.

27

"""

27

"""

28

29

# basic idea:

29

# basic idea:

30

# - mark a and b with different sides

30

# - mark a and b with different sides

31

# - if a parent's children are all on the same side, the parent is

31

# - if a parent's children are all on the same side, the parent is

32

# on that side, otherwise it is on no side

32

# on that side, otherwise it is on no side

33

# - walk the graph in topological order with the help of a heap;

33

# - walk the graph in topological order with the help of a heap;

34

# - add unseen parents to side map

34

# - add unseen parents to side map

35

# - clear side of any parent that has children on different sides

35

# - clear side of any parent that has children on different sides

36

# - track number of interesting revs that might still be on a side

36

# - track number of interesting revs that might still be on a side

37

# - track the lowest interesting rev seen

37

# - track the lowest interesting rev seen

38

# - quit when interesting revs is zero

38

# - quit when interesting revs is zero

39

40

cl = repo.changelog

40

cl = repo.changelog

41

working = len(cl) # pseudo rev for the working directory

41

working = len(cl) # pseudo rev for the working directory

42

if a is None:

42

if a is None:

43

a = working

43

a = working

44

if b is None:

44

if b is None:

45

b = working

45

b = working

46

47

side = {a: -1, b: 1}

47

side = {a: -1, b: 1}

48

visit = [-a, -b]

48

visit = [-a, -b]

49

heapq.heapify(visit)

49

heapq.heapify(visit)

50

interesting = len(visit)

50

interesting = len(visit)

51

hascommonancestor = False

51

hascommonancestor = False

52

limit = working

52

limit = working

53

54

while interesting:

54

while interesting:

55

r = -heapq.heappop(visit)

55

r = -heapq.heappop(visit)

56

if r == working:

56

if r == working:

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

57

parents = [cl.rev(p) for p in repo.dirstate.parents()]

58

else:

58

else:

59

parents = cl.parentrevs(r)

59

parents = cl.parentrevs(r)

60

for p in parents:

60

for p in parents:

61

if p < 0:

61

if p < 0:

62

continue

62

continue

63

if p not in side:

63

if p not in side:

64

# first time we see p; add it to visit

64

# first time we see p; add it to visit

65

side[p] = side[r]

65

side[p] = side[r]

66

if side[p]:

66

if side[p]:

67

interesting += 1

67

interesting += 1

68

heapq.heappush(visit, -p)

68

heapq.heappush(visit, -p)

69

elif side[p] and side[p] != side[r]:

69

elif side[p] and side[p] != side[r]:

70

# p was interesting but now we know better

70

# p was interesting but now we know better

71

side[p] = 0

71

side[p] = 0

72

interesting -= 1

72

interesting -= 1

73

hascommonancestor = True

73

hascommonancestor = True

74

if side[r]:

74

if side[r]:

75

limit = r # lowest rev visited

75

limit = r # lowest rev visited

76

interesting -= 1

76

interesting -= 1

77

78

if not hascommonancestor:

78

if not hascommonancestor:

79

return None

79

return None

80

81

# Consider the following flow (see test-commit-amend.t under issue4405):

81

# Consider the following flow (see test-commit-amend.t under issue4405):

82

# 1/ File 'a0' committed

82

# 1/ File 'a0' committed

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

83

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

84

# 3/ Move back to first commit

84

# 3/ Move back to first commit

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

85

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

86

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

87

#

87

#

88

# During the amend in step five, we will be in this state:

88

# During the amend in step five, we will be in this state:

89

#

89

#

90

# @ 3 temporary amend commit for a1-amend

90

# @ 3 temporary amend commit for a1-amend

91

# |

91

# |

92

# o 2 a1-amend

92

# o 2 a1-amend

93

# |

93

# |

94

# | o 1 a1

94

# | o 1 a1

95

# |/

95

# |/

96

# o 0 a0

96

# o 0 a0

97

#

97

#

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

98

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

99

# yet the filelog has the copy information in rev 1 and we will not look

99

# yet the filelog has the copy information in rev 1 and we will not look

100

# back far enough unless we also look at the a and b as candidates.

100

# back far enough unless we also look at the a and b as candidates.

101

# This only occurs when a is a descendent of b or visa-versa.

101

# This only occurs when a is a descendent of b or visa-versa.

102

return min(limit, a, b)

102

return min(limit, a, b)

103

104

def _chain(src, dst, a, b):

104

def _chain(src, dst, a, b):

105

'''chain two sets of copies a->b'''

105

'''chain two sets of copies a->b'''

106

t = a.copy()

106

t = a.copy()

107

for k, v in b.iteritems():

107

for k, v in b.iteritems():

108

if v in t:

108

if v in t:

109

# found a chain

109

# found a chain

110

if t[v] != k:

110

if t[v] != k:

111

# file wasn't renamed back to itself

111

# file wasn't renamed back to itself

112

t[k] = t[v]

112

t[k] = t[v]

113

if v not in dst:

113

if v not in dst:

114

# chain was a rename, not a copy

114

# chain was a rename, not a copy

115

del t[v]

115

del t[v]

116

if v in src:

116

if v in src:

117

# file is a copy of an existing file

117

# file is a copy of an existing file

118

t[k] = v

118

t[k] = v

119

120

# remove criss-crossed copies

120

# remove criss-crossed copies

121

for k, v in t.items():

121

for k, v in t.items():

122

if k in src and v in dst:

122

if k in src and v in dst:

123

del t[k]

123

del t[k]

124

125

return t

125

return t

126

127

def _tracefile(fctx, am, limit=-1):

127

def _tracefile(fctx, am, limit=-1):

128

'''return file context that is the ancestor of fctx present in ancestor

128

'''return file context that is the ancestor of fctx present in ancestor

129

manifest am, stopping after the first ancestor lower than limit'''

129

manifest am, stopping after the first ancestor lower than limit'''

130

131

for f in fctx.ancestors():

131

for f in fctx.ancestors():

132

if am.get(f.path(), None) == f.filenode():

132

if am.get(f.path(), None) == f.filenode():

133

return f

133

return f

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

134

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

135

return None

135

return None

136

137

def _dirstatecopies(d):

137

def _dirstatecopies(d):

138

ds = d._repo.dirstate

138

ds = d._repo.dirstate

139

c = ds.copies().copy()

139

c = ds.copies().copy()

140

for k in c.keys():

140

for k in c.keys():

141

if ds[k] not in 'anm':

141

if ds[k] not in 'anm':

142

del c[k]

142

del c[k]

143

return c

143

return c

144

145

def _computeforwardmissing(a, b, match=None):

145

def _computeforwardmissing(a, b, match=None):

146

"""Computes which files are in b but not a.

146

"""Computes which files are in b but not a.

147

This is its own function so extensions can easily wrap this call to see what

147

This is its own function so extensions can easily wrap this call to see what

148

files _forwardcopies is about to process.

148

files _forwardcopies is about to process.

149

"""

149

"""

150

ma = a.manifest()

150

ma = a.manifest()

151

mb = b.manifest()

151

mb = b.manifest()

152

if match:

152

if match:

153

ma = ma.matches(match)

153

ma = ma.matches(match)

154

mb = mb.matches(match)

154

mb = mb.matches(match)

155

return mb.filesnotin(ma)

155

return mb.filesnotin(ma)

156

157

def _forwardcopies(a, b, match=None):

157

def _forwardcopies(a, b, match=None):

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

159

160

# check for working copy

160

# check for working copy

161

w = None

161

w = None

162

if b.rev() is None:

162

if b.rev() is None:

163

w = b

163

w = b

164

b = w.p1()

164

b = w.p1()

165

if a == b:

165

if a == b:

166

# short-circuit to avoid issues with merge states

166

# short-circuit to avoid issues with merge states

167

return _dirstatecopies(w)

167

return _dirstatecopies(w)

168

169

# files might have to be traced back to the fctx parent of the last

169

# files might have to be traced back to the fctx parent of the last

170

# one-side-only changeset, but not further back than that

170

# one-side-only changeset, but not further back than that

171

limit = _findlimit(a._repo, a.rev(), b.rev())

171

limit = _findlimit(a._repo, a.rev(), b.rev())

172

if limit is None:

172

if limit is None:

173

limit = -1

173

limit = -1

174

am = a.manifest()

174

am = a.manifest()

175

176

# find where new files came from

176

# find where new files came from

177

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

178

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

179

cm = {}

179

cm = {}

180

181

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

182

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

183

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

184

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

185

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

186

# this comparison.

186

# this comparison.

187

forwardmissingmatch = match

187

forwardmissingmatch = match

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

188

if not match and b.p1() == a and b.p2().node() == node.nullid:

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

189

forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

190

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

192

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

for f in missing:

193

for f in missing:

194

fctx = b[f]

194

fctx = b[f]

195

fctx._ancestrycontext = ancestrycontext

195

fctx._ancestrycontext = ancestrycontext

196

ofctx = _tracefile(fctx, am, limit)

196

ofctx = _tracefile(fctx, am, limit)

197

if ofctx:

197

if ofctx:

198

cm[f] = ofctx.path()

198

cm[f] = ofctx.path()

199

200

# combine copies from dirstate if necessary

200

# combine copies from dirstate if necessary

201

if w is not None:

201

if w is not None:

202

cm = _chain(a, w, cm, _dirstatecopies(w))

202

cm = _chain(a, w, cm, _dirstatecopies(w))

203

204

return cm

204

return cm

205

206

def _backwardrenames(a, b):

206

def _backwardrenames(a, b):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

207

if a._repo.ui.configbool('experimental', 'disablecopytrace'):

208

return {}

208

return {}

209

210

# Even though we're not taking copies into account, 1:n rename situations

210

# Even though we're not taking copies into account, 1:n rename situations

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

211

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

212

# arbitrarily pick one of the renames.

212

# arbitrarily pick one of the renames.

213

f = _forwardcopies(b, a)

213

f = _forwardcopies(b, a)

214

r = {}

214

r = {}

215

for k, v in sorted(f.iteritems()):

215

for k, v in sorted(f.iteritems()):

216

# remove copies

216

# remove copies

217

if v in a:

217

if v in a:

218

continue

218

continue

219

r[v] = k

219

r[v] = k

220

return r

220

return r

221

222

def pathcopies(x, y, match=None):

222

def pathcopies(x, y, match=None):

223

'''find {dst@y: src@x} copy mapping for directed compare'''

223

'''find {dst@y: src@x} copy mapping for directed compare'''

224

if x == y or not x or not y:

224

if x == y or not x or not y:

225

return {}

225

return {}

226

a = y.ancestor(x)

226

a = y.ancestor(x)

227

if a == x:

227

if a == x:

228

return _forwardcopies(x, y, match=match)

228

return _forwardcopies(x, y, match=match)

229

if a == y:

229

if a == y:

230

return _backwardrenames(x, y)

230

return _backwardrenames(x, y)

231

return _chain(x, y, _backwardrenames(x, a),

231

return _chain(x, y, _backwardrenames(x, a),

232

_forwardcopies(a, y, match=match))

232

_forwardcopies(a, y, match=match))

233

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

234

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

235

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

236

and c2. This is its own function so extensions can easily wrap this call

236

and c2. This is its own function so extensions can easily wrap this call

237

to see what files mergecopies is about to process.

237

to see what files mergecopies is about to process.

238

239

Even though c1 and c2 are not used in this function, they are useful in

239

Even though c1 and c2 are not used in this function, they are useful in

240

other extensions for being able to read the file nodes of the changed files.

240

other extensions for being able to read the file nodes of the changed files.

241

"""

241

"""

242

u1 = sorted(addedinm1 - addedinm2)

242

u1 = sorted(addedinm1 - addedinm2)

243

u2 = sorted(addedinm2 - addedinm1)

243

u2 = sorted(addedinm2 - addedinm1)

244

245

if u1:

245

if u1:

246

repo.ui.debug(" unmatched files in local:\n %s\n"

246

repo.ui.debug(" unmatched files in local:\n %s\n"

247

% "\n ".join(u1))

247

% "\n ".join(u1))

248

if u2:

248

if u2:

249

repo.ui.debug(" unmatched files in other:\n %s\n"

249

repo.ui.debug(" unmatched files in other:\n %s\n"

250

% "\n ".join(u2))

250

% "\n ".join(u2))

251

return u1, u2

251

return u1, u2

252

253

def _makegetfctx(ctx):

253

def _makegetfctx(ctx):

254

"""return a 'getfctx' function suitable for _checkcopies usage

254

"""return a 'getfctx' function suitable for _checkcopies usage

255

256

We have to re-setup the function building 'filectx' for each

256

We have to re-setup the function building 'filectx' for each

257

'_checkcopies' to ensure the linkrev adjustment is properly setup for

257

'_checkcopies' to ensure the linkrev adjustment is properly setup for

258

each. Linkrev adjustment is important to avoid bug in rename

258

each. Linkrev adjustment is important to avoid bug in rename

259

detection. Moreover, having a proper '_ancestrycontext' setup ensures

259

detection. Moreover, having a proper '_ancestrycontext' setup ensures

260

the performance impact of this adjustment is kept limited. Without it,

260

the performance impact of this adjustment is kept limited. Without it,

261

each file could do a full dag traversal making the time complexity of

261

each file could do a full dag traversal making the time complexity of

262

the operation explode (see issue4537).

262

the operation explode (see issue4537).

263

264

This function exists here mostly to limit the impact on stable. Feel

264

This function exists here mostly to limit the impact on stable. Feel

265

free to refactor on default.

265

free to refactor on default.

266

"""

266

"""

267

rev = ctx.rev()

267

rev = ctx.rev()

268

repo = ctx._repo

268

repo = ctx._repo

269

ac = getattr(ctx, '_ancestrycontext', None)

269

ac = getattr(ctx, '_ancestrycontext', None)

270

if ac is None:

270

if ac is None:

271

revs = [rev]

271

revs = [rev]

272

if rev is None:

272

if rev is None:

273

revs = [p.rev() for p in ctx.parents()]

273

revs = [p.rev() for p in ctx.parents()]

274

ac = repo.changelog.ancestors(revs, inclusive=True)

274

ac = repo.changelog.ancestors(revs, inclusive=True)

275

ctx._ancestrycontext = ac

275

ctx._ancestrycontext = ac

276

def makectx(f, n):

276

def makectx(f, n):

277

if len(n) != 20: # in a working context?

277

if len(n) != 20: # in a working context?

278

if ctx.rev() is None:

278

if ctx.rev() is None:

279

return ctx.filectx(f)

279

return ctx.filectx(f)

280

return repo[None][f]

280

return repo[None][f]

281

fctx = repo.filectx(f, fileid=n)

281

fctx = repo.filectx(f, fileid=n)

282

# setup only needed for filectx not create from a changectx

282

# setup only needed for filectx not create from a changectx

283

fctx._ancestrycontext = ac

283

fctx._ancestrycontext = ac

284

fctx._descendantrev = rev

284

fctx._descendantrev = rev

285

return fctx

285

return fctx

286

return util.lrucachefunc(makectx)

286

return util.lrucachefunc(makectx)

287

288

def mergecopies(repo, c1, c2, base):

288

def mergecopies(repo, c1, c2, base):

289

"""

289

"""

290

Find moves and copies between context c1 and c2 that are relevant

290

Find moves and copies between context c1 and c2 that are relevant

291

for merging. 'base' will be used as the merge base.

291

for merging. 'base' will be used as the merge base.

292

293

Returns four dicts: "copy", "movewithdir", "diverge", and

293

Returns four dicts: "copy", "movewithdir", "diverge", and

294

"renamedelete".

294

"renamedelete".

295

296

"copy" is a mapping from destination name -> source name,

296

"copy" is a mapping from destination name -> source name,

297

where source is in c1 and destination is in c2 or vice-versa.

297

where source is in c1 and destination is in c2 or vice-versa.

298

299

"movewithdir" is a mapping from source name -> destination name,

299

"movewithdir" is a mapping from source name -> destination name,

300

where the file at source present in one context but not the other

300

where the file at source present in one context but not the other

301

needs to be moved to destination by the merge process, because the

301

needs to be moved to destination by the merge process, because the

302

other context moved the directory it is in.

302

other context moved the directory it is in.

303

304

"diverge" is a mapping of source name -> list of destination names

304

"diverge" is a mapping of source name -> list of destination names

305

for divergent renames.

305

for divergent renames.

306

307

"renamedelete" is a mapping of source name -> list of destination

307

"renamedelete" is a mapping of source name -> list of destination

308

names for files deleted in c1 that were renamed in c2 or vice-versa.

308

names for files deleted in c1 that were renamed in c2 or vice-versa.

309

"""

309

"""

310

# avoid silly behavior for update from empty dir

310

# avoid silly behavior for update from empty dir

311

if not c1 or not c2 or c1 == c2:

311

if not c1 or not c2 or c1 == c2:

312

return {}, {}, {}, {}

312

return {}, {}, {}, {}

313

314

# avoid silly behavior for parent -> working dir

314

# avoid silly behavior for parent -> working dir

315

if c2.node() is None and c1.node() == repo.dirstate.p1():

315

if c2.node() is None and c1.node() == repo.dirstate.p1():

316

return repo.dirstate.copies(), {}, {}, {}

316

return repo.dirstate.copies(), {}, {}, {}

317

318

# Copy trace disabling is explicitly below the node == p1 logic above

318

# Copy trace disabling is explicitly below the node == p1 logic above

319

# because the logic above is required for a simple copy to be kept across a

319

# because the logic above is required for a simple copy to be kept across a

320

# rebase.

320

# rebase.

321

if repo.ui.configbool('experimental', 'disablecopytrace'):

321

if repo.ui.configbool('experimental', 'disablecopytrace'):

322

return {}, {}, {}, {}

322

return {}, {}, {}, {}

323

324

# In certain scenarios (e.g. graft, update or rebase), base can be

324

# In certain scenarios (e.g. graft, update or rebase), base can be

325

# overridden We still need to know a real common ancestor in this case We

325

# overridden We still need to know a real common ancestor in this case We

326

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

326

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

327

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

327

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

328

# caller may not know if the revision passed in lieu of the CA is a genuine

328

# caller may not know if the revision passed in lieu of the CA is a genuine

329

# common ancestor or not without explicitly checking it, it's better to

329

# common ancestor or not without explicitly checking it, it's better to

330

# determine that here.

330

# determine that here.

331

#

331

#

332

# base.descendant(wc) and base.descendant(base) are False, work around that

332

# base.descendant(wc) and base.descendant(base) are False, work around that

333

_c1 = c1.p1() if c1.rev() is None else c1

333

_c1 = c1.p1() if c1.rev() is None else c1

334

_c2 = c2.p1() if c2.rev() is None else c2

334

_c2 = c2.p1() if c2.rev() is None else c2

335

# an endpoint is "dirty" if it isn't a descendant of the merge base

335

# an endpoint is "dirty" if it isn't a descendant of the merge base

336

# if we have a dirty endpoint, we need to trigger graft logic, and also

336

# if we have a dirty endpoint, we need to trigger graft logic, and also

337

# keep track of which endpoint is dirty

337

# keep track of which endpoint is dirty

338

dirtyc1 = not (base == _c1 or base.descendant(_c1))

338

dirtyc1 = not (base == _c1 or base.descendant(_c1))

339

dirtyc2 = not (base== _c2 or base.descendant(_c2))

339

dirtyc2 = not (base== _c2 or base.descendant(_c2))

340

graft = dirtyc1 or dirtyc2

340

graft = dirtyc1 or dirtyc2

341

tca = base

342

if graft:

343

tca = _c1.ancestor(_c2)

344

341

limit = _findlimit(repo, c1.rev(), c2.rev())

345

limit = _findlimit(repo, c1.rev(), c2.rev())

342

if limit is None:

346

if limit is None:

343

# no common ancestor, no copies

347

# no common ancestor, no copies

344

return {}, {}, {}, {}

348

return {}, {}, {}, {}

345

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

349

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

346

350

347

m1 = c1.manifest()

351

m1 = c1.manifest()

348

m2 = c2.manifest()

352

m2 = c2.manifest()

349

mb = base.manifest()

353

mb = base.manifest()

350

354

351

# gather data from _checkcopies:

355

# gather data from _checkcopies:

352

# - diverge = record all diverges in this dict

356

# - diverge = record all diverges in this dict

353

# - copy = record all non-divergent copies in this dict

357

# - copy = record all non-divergent copies in this dict

354

# - fullcopy = record all copies in this dict

358

# - fullcopy = record all copies in this dict

355

diverge = {} # divergence data is shared

359

diverge = {} # divergence data is shared

356

data1 = {'copy': {},

360

data1 = {'copy': {},

357

'fullcopy': {},

361

'fullcopy': {},

358

'diverge': diverge,

362

'diverge': diverge,

359

}

363

}

360

data2 = {'copy': {},

364

data2 = {'copy': {},

361

'fullcopy': {},

365

'fullcopy': {},

362

'diverge': diverge,

366

'diverge': diverge,

363

}

367

}

364

368

365

# find interesting file sets from manifests

369

# find interesting file sets from manifests

366

addedinm1 = m1.filesnotin(mb)

370

addedinm1 = m1.filesnotin(mb)

367

addedinm2 = m2.filesnotin(mb)

371

addedinm2 = m2.filesnotin(mb)

368

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

372

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

369

u1u, u2u = u1r, u2r

373

u1u, u2u = u1r, u2r

370

bothnew = sorted(addedinm1 & addedinm2)

374

bothnew = sorted(addedinm1 & addedinm2)

371

375

372

for f in u1u:

376

for f in u1u:

373

_checkcopies(c1, f, m1, m2, base, limit, data1)

377

_checkcopies(c1, f, m1, m2, base, limit, data1)

374

378

375

for f in u2u:

379

for f in u2u:

376

_checkcopies(c2, f, m2, m1, base, limit, data2)

380

_checkcopies(c2, f, m2, m1, base, limit, data2)

377

381

378

copy = dict(data1['copy'].items() + data2['copy'].items())

382

copy = dict(data1['copy'].items() + data2['copy'].items())

379

fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())

383

fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())

380

384

381

renamedelete = {}

385

renamedelete = {}

382

renamedeleteset = set()

386

renamedeleteset = set()

383

divergeset = set()

387

divergeset = set()

384

for of, fl in diverge.items():

388

for of, fl in diverge.items():

385

if len(fl) == 1 or of in c1 or of in c2:

389

if len(fl) == 1 or of in c1 or of in c2:

386

del diverge[of] # not actually divergent, or not a rename

390

del diverge[of] # not actually divergent, or not a rename

387

if of not in c1 and of not in c2:

391

if of not in c1 and of not in c2:

388

# renamed on one side, deleted on the other side, but filter

392

# renamed on one side, deleted on the other side, but filter

389

# out files that have been renamed and then deleted

393

# out files that have been renamed and then deleted

390

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

394

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

391

renamedeleteset.update(fl) # reverse map for below

395

renamedeleteset.update(fl) # reverse map for below

392

else:

396

else:

393

divergeset.update(fl) # reverse map for below

397

divergeset.update(fl) # reverse map for below

394

398

395

if bothnew:

399

if bothnew:

396

repo.ui.debug(" unmatched files new in both:\n %s\n"

400

repo.ui.debug(" unmatched files new in both:\n %s\n"

397

% "\n ".join(bothnew))

401

% "\n ".join(bothnew))

398

bothdiverge = {}

402

bothdiverge = {}

399

bothdata = {'copy': {},

403

bothdata = {'copy': {},

400

'fullcopy': {},

404

'fullcopy': {},

401

'diverge': bothdiverge,

405

'diverge': bothdiverge,

402

}

406

}

403

for f in bothnew:

407

for f in bothnew:

404

_checkcopies(c1, f, m1, m2, base, limit, bothdata)

408

_checkcopies(c1, f, m1, m2, base, limit, bothdata)

405

_checkcopies(c2, f, m2, m1, base, limit, bothdata)

409

_checkcopies(c2, f, m2, m1, base, limit, bothdata)

406

for of, fl in bothdiverge.items():

410

for of, fl in bothdiverge.items():

407

if len(fl) == 2 and fl[0] == fl[1]:

411

if len(fl) == 2 and fl[0] == fl[1]:

408

copy[fl[0]] = of # not actually divergent, just matching renames

412

copy[fl[0]] = of # not actually divergent, just matching renames

409

413

410

if fullcopy and repo.ui.debugflag:

414

if fullcopy and repo.ui.debugflag:

411

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

415

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

412

"% = renamed and deleted):\n")

416

"% = renamed and deleted):\n")

413

for f in sorted(fullcopy):

417

for f in sorted(fullcopy):

414

note = ""

418

note = ""

415

if f in copy:

419

if f in copy:

416

note += "*"

420

note += "*"

417

if f in divergeset:

421

if f in divergeset:

418

note += "!"

422

note += "!"

419

if f in renamedeleteset:

423

if f in renamedeleteset:

420

note += "%"

424

note += "%"

421

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

425

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

422

note))

426

note))

423

del divergeset

427

del divergeset

424

428

425

if not fullcopy:

429

if not fullcopy:

426

return copy, {}, diverge, renamedelete

430

return copy, {}, diverge, renamedelete

427

431

428

repo.ui.debug(" checking for directory renames\n")

432

repo.ui.debug(" checking for directory renames\n")

429

433

430

# generate a directory move map

434

# generate a directory move map

431

d1, d2 = c1.dirs(), c2.dirs()

435

d1, d2 = c1.dirs(), c2.dirs()

432

# Hack for adding '', which is not otherwise added, to d1 and d2

436

# Hack for adding '', which is not otherwise added, to d1 and d2

433

d1.addpath('/')

437

d1.addpath('/')

434

d2.addpath('/')

438

d2.addpath('/')

435

invalid = set()

439

invalid = set()

436

dirmove = {}

440

dirmove = {}

437

441

438

# examine each file copy for a potential directory move, which is

442

# examine each file copy for a potential directory move, which is

439

# when all the files in a directory are moved to a new directory

443

# when all the files in a directory are moved to a new directory

440

for dst, src in fullcopy.iteritems():

444

for dst, src in fullcopy.iteritems():

441

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

445

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

442

if dsrc in invalid:

446

if dsrc in invalid:

443

# already seen to be uninteresting

447

# already seen to be uninteresting

444

continue

448

continue

445

elif dsrc in d1 and ddst in d1:

449

elif dsrc in d1 and ddst in d1:

446

# directory wasn't entirely moved locally

450

# directory wasn't entirely moved locally

447

invalid.add(dsrc + "/")

451

invalid.add(dsrc + "/")

448

elif dsrc in d2 and ddst in d2:

452

elif dsrc in d2 and ddst in d2:

449

# directory wasn't entirely moved remotely

453

# directory wasn't entirely moved remotely

450

invalid.add(dsrc + "/")

454

invalid.add(dsrc + "/")

451

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

455

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

452

# files from the same directory moved to two different places

456

# files from the same directory moved to two different places

453

invalid.add(dsrc + "/")

457

invalid.add(dsrc + "/")

454

else:

458

else:

455

# looks good so far

459

# looks good so far

456

dirmove[dsrc + "/"] = ddst + "/"

460

dirmove[dsrc + "/"] = ddst + "/"

457

461

458

for i in invalid:

462

for i in invalid:

459

if i in dirmove:

463

if i in dirmove:

460

del dirmove[i]

464

del dirmove[i]

461

del d1, d2, invalid

465

del d1, d2, invalid

462

466

463

if not dirmove:

467

if not dirmove:

464

return copy, {}, diverge, renamedelete

468

return copy, {}, diverge, renamedelete

465

469

466

for d in dirmove:

470

for d in dirmove:

467

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

471

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

468

(d, dirmove[d]))

472

(d, dirmove[d]))

469

473

470

movewithdir = {}

474

movewithdir = {}

471

# check unaccounted nonoverlapping files against directory moves

475

# check unaccounted nonoverlapping files against directory moves

472

for f in u1r + u2r:

476

for f in u1r + u2r:

473

if f not in fullcopy:

477

if f not in fullcopy:

474

for d in dirmove:

478

for d in dirmove:

475

if f.startswith(d):

479

if f.startswith(d):

476

# new file added in a directory that was moved, move it

480

# new file added in a directory that was moved, move it

477

df = dirmove[d] + f[len(d):]

481

df = dirmove[d] + f[len(d):]

478

if df not in copy:

482

if df not in copy:

479

movewithdir[f] = df

483

movewithdir[f] = df

480

repo.ui.debug((" pending file src: '%s' -> "

484

repo.ui.debug((" pending file src: '%s' -> "

481

"dst: '%s'\n") % (f, df))

485

"dst: '%s'\n") % (f, df))

482

break

486

break

483

487

484

return copy, movewithdir, diverge, renamedelete

488

return copy, movewithdir, diverge, renamedelete

485

489

486

def _related(f1, f2, limit):

490

def _related(f1, f2, limit):

487

"""return True if f1 and f2 filectx have a common ancestor

491

"""return True if f1 and f2 filectx have a common ancestor

488

492

489

Walk back to common ancestor to see if the two files originate

493

Walk back to common ancestor to see if the two files originate

490

from the same file. Since workingfilectx's rev() is None it messes

494

from the same file. Since workingfilectx's rev() is None it messes

491

up the integer comparison logic, hence the pre-step check for

495

up the integer comparison logic, hence the pre-step check for

492

None (f1 and f2 can only be workingfilectx's initially).

496

None (f1 and f2 can only be workingfilectx's initially).

493

"""

497

"""

494

498

495

if f1 == f2:

499

if f1 == f2:

496

return f1 # a match

500

return f1 # a match

497

501

498

g1, g2 = f1.ancestors(), f2.ancestors()

502

g1, g2 = f1.ancestors(), f2.ancestors()

499

try:

503

try:

500

f1r, f2r = f1.linkrev(), f2.linkrev()

504

f1r, f2r = f1.linkrev(), f2.linkrev()

501

505

502

if f1r is None:

506

if f1r is None:

503

f1 = next(g1)

507

f1 = next(g1)

504

if f2r is None:

508

if f2r is None:

505

f2 = next(g2)

509

f2 = next(g2)

506

510

507

while True:

511

while True:

508

f1r, f2r = f1.linkrev(), f2.linkrev()

512

f1r, f2r = f1.linkrev(), f2.linkrev()

509

if f1r > f2r:

513

if f1r > f2r:

510

f1 = next(g1)

514

f1 = next(g1)

511

elif f2r > f1r:

515

elif f2r > f1r:

512

f2 = next(g2)

516

f2 = next(g2)

513

elif f1 == f2:

517

elif f1 == f2:

514

return f1 # a match

518

return f1 # a match

515

elif f1r == f2r or f1r < limit or f2r < limit:

519

elif f1r == f2r or f1r < limit or f2r < limit:

516

return False # copy no longer relevant

520

return False # copy no longer relevant

517

except StopIteration:

521

except StopIteration:

518

return False

522

return False

519

523

520

def _checkcopies(ctx, f, m1, m2, base, limit, data):

524

def _checkcopies(ctx, f, m1, m2, base, limit, data):

521

"""

525

"""

522

check possible copies of f from m1 to m2

526

check possible copies of f from m1 to m2

523

527

524

ctx = starting context for f in m1

528

ctx = starting context for f in m1

525

f = the filename to check (as in m1)

529

f = the filename to check (as in m1)

526

m1 = the source manifest

530

m1 = the source manifest

527

m2 = the destination manifest

531

m2 = the destination manifest

528

base = the changectx used as a merge base

532

base = the changectx used as a merge base

529

limit = the rev number to not search beyond

533

limit = the rev number to not search beyond

530

data = dictionary of dictionary to store copy data. (see mergecopies)

534

data = dictionary of dictionary to store copy data. (see mergecopies)

531

535

532

note: limit is only an optimization, and there is no guarantee that

536

note: limit is only an optimization, and there is no guarantee that

533

irrelevant revisions will not be limited

537

irrelevant revisions will not be limited

534

there is no easy way to make this algorithm stop in a guaranteed way

538

there is no easy way to make this algorithm stop in a guaranteed way

535

once it "goes behind a certain revision".

539

once it "goes behind a certain revision".

536

"""

540

"""

537

541

538

mb = base.manifest()

542

mb = base.manifest()

539

getfctx = _makegetfctx(ctx)

543

getfctx = _makegetfctx(ctx)

540

544

541

of = None

545

of = None

542

seen = set([f])

546

seen = set([f])

543

for oc in getfctx(f, m1[f]).ancestors():

547

for oc in getfctx(f, m1[f]).ancestors():

544

ocr = oc.linkrev()

548

ocr = oc.linkrev()

545

of = oc.path()

549

of = oc.path()

546

if of in seen:

550

if of in seen:

547

# check limit late - grab last rename before

551

# check limit late - grab last rename before

548

if ocr < limit:

552

if ocr < limit:

549

break

553

break

550

continue

554

continue

551

seen.add(of)

555

seen.add(of)

552

556

553

data['fullcopy'][f] = of # remember for dir rename detection

557

data['fullcopy'][f] = of # remember for dir rename detection

554

if of not in m2:

558

if of not in m2:

555

continue # no match, keep looking

559

continue # no match, keep looking

556

if m2[of] == mb.get(of):

560

if m2[of] == mb.get(of):

557

return # no merge needed, quit early

561

return # no merge needed, quit early

558

c2 = getfctx(of, m2[of])

562

c2 = getfctx(of, m2[of])

559

# c2 might be a plain new file on added on destination side that is

563

# c2 might be a plain new file on added on destination side that is

560

# unrelated to the droids we are looking for.

564

# unrelated to the droids we are looking for.

561

cr = _related(oc, c2, base.rev())

565

cr = _related(oc, c2, base.rev())

562

if cr and (of == f or of == c2.path()): # non-divergent

566

if cr and (of == f or of == c2.path()): # non-divergent

563

if of in mb:

567

if of in mb:

564

data['copy'][f] = of

568

data['copy'][f] = of

565

return

569

return

566

570

567

if of in mb:

571

if of in mb:

568

data['diverge'].setdefault(of, []).append(f)

572

data['diverge'].setdefault(of, []).append(f)

569

573

570

def duplicatecopies(repo, rev, fromrev, skiprev=None):

574

def duplicatecopies(repo, rev, fromrev, skiprev=None):

571

'''reproduce copies from fromrev to rev in the dirstate

575

'''reproduce copies from fromrev to rev in the dirstate

572

576

573

If skiprev is specified, it's a revision that should be used to

577

If skiprev is specified, it's a revision that should be used to

574

filter copy records. Any copies that occur between fromrev and

578

filter copy records. Any copies that occur between fromrev and

575

skiprev will not be duplicated, even if they appear in the set of

579

skiprev will not be duplicated, even if they appear in the set of

576

copies between fromrev and rev.

580

copies between fromrev and rev.

577

'''

581

'''

578

exclude = {}

582

exclude = {}

579

if (skiprev is not None and

583

if (skiprev is not None and

580

not repo.ui.configbool('experimental', 'disablecopytrace')):

584

not repo.ui.configbool('experimental', 'disablecopytrace')):

581

# disablecopytrace skips this line, but not the entire function because

585

# disablecopytrace skips this line, but not the entire function because

582

# the line below is O(size of the repo) during a rebase, while the rest

586

# the line below is O(size of the repo) during a rebase, while the rest

583

# of the function is much faster (and is required for carrying copy

587

# of the function is much faster (and is required for carrying copy

584

# metadata across the rebase anyway).

588

# metadata across the rebase anyway).

585

exclude = pathcopies(repo[fromrev], repo[skiprev])

589

exclude = pathcopies(repo[fromrev], repo[skiprev])

586

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

590

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

587

# copies.pathcopies returns backward renames, so dst might not

591

# copies.pathcopies returns backward renames, so dst might not

588

# actually be in the dirstate

592

# actually be in the dirstate

589

if dst in exclude:

593

if dst in exclude:

590

continue

594

continue

591

if repo.dirstate[dst] in "nma":

595

if repo.dirstate[dst] in "nma":

592

repo.dirstate.copy(src, dst)

596

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import heapq
             from . import (
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 if match:
                     ma = ma.matches(match)
                     mb = mb.matches(match)
                 return mb.filesnotin(ma)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if not match and b.p1() == a and b.p2().node() == node.nullid:
                     forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if len(n) != 20:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def mergecopies(repo, c1, c2, base):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging. 'base' will be used as the merge base.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if repo.ui.configbool('experimental', 'disablecopytrace'):
                     return {}, {}, {}, {}
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.descendant(wc) and base.descendant(base) are False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not (base == _c1 or base.descendant(_c1))
                 dirtyc2 = not (base== _c2 or base.descendant(_c2))
                 graft = dirtyc1 or dirtyc2
+                tca = base
+                if graft:
+                    tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 diverge = {} # divergence data is shared
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'diverge': diverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'diverge': diverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb)
                 addedinm2 = m2.filesnotin(mb)
                 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                 u1u, u2u = u1r, u2r
                 bothnew = sorted(addedinm1 & addedinm2)
                 for f in u1u:
                     _checkcopies(c1, f, m1, m2, base, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, f, m2, m1, base, limit, data2)
                 copy = dict(data1['copy'].items() + data2['copy'].items())
                 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothdata = {'copy': {},
                             'fullcopy': {},
                             'diverge': bothdiverge,
                            }
                 for f in bothnew:
                     _checkcopies(c1, f, m1, m2, base, limit, bothdata)
                     _checkcopies(c2, f, m2, m1, base, limit, bothdata)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def _related(f1, f2, limit):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         elif f1 == f2:
                             return f1 # a match
                         elif f1r == f2r or f1r < limit or f2r < limit:
                             return False # copy no longer relevant
                 except StopIteration:
                     return False
             def _checkcopies(ctx, f, m1, m2, base, limit, data):
                 """
                 check possible copies of f from m1 to m2
                 ctx = starting context for f in m1
                 f = the filename to check (as in m1)
                 m1 = the source manifest
                 m2 = the destination manifest
                 base = the changectx used as a merge base
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and there is no guarantee that
                 irrelevant revisions will not be limited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 mb = base.manifest()
                 getfctx = _makegetfctx(ctx)
                 of = None
                 seen = set([f])
                 for oc in getfctx(f, m1[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     data['fullcopy'][f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getfctx(of, m2[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, base.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if of in mb:
                             data['copy'][f] = of
                         return
                 if of in mb:
                     data['diverge'].setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if (skiprev is not None and
                     not repo.ui.configbool('experimental', 'disablecopytrace')):
                     # disablecopytrace skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)