upstream/mercurial-mirror Commit - r40489:07a66c13

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

from .utils import (

23

from .utils import (

24

stringutil,

24

stringutil,

25

)

25

)

26

27

def _findlimit(repo, a, b):

27

def _findlimit(repo, a, b):

28

"""

28

"""

29

Find the last revision that needs to be checked to ensure that a full

29

Find the last revision that needs to be checked to ensure that a full

30

transitive closure for file copies can be properly calculated.

30

transitive closure for file copies can be properly calculated.

31

Generally, this means finding the earliest revision number that's an

31

Generally, this means finding the earliest revision number that's an

32

ancestor of a or b but not both, except when a or b is a direct descendent

32

ancestor of a or b but not both, except when a or b is a direct descendent

33

of the other, in which case we can return the minimum revnum of a and b.

33

of the other, in which case we can return the minimum revnum of a and b.

34

None if no such revision exists.

34

None if no such revision exists.

35

"""

35

"""

36

37

# basic idea:

37

# basic idea:

38

# - mark a and b with different sides

38

# - mark a and b with different sides

39

# - if a parent's children are all on the same side, the parent is

39

# - if a parent's children are all on the same side, the parent is

40

# on that side, otherwise it is on no side

40

# on that side, otherwise it is on no side

41

# - walk the graph in topological order with the help of a heap;

41

# - walk the graph in topological order with the help of a heap;

42

# - add unseen parents to side map

42

# - add unseen parents to side map

43

# - clear side of any parent that has children on different sides

43

# - clear side of any parent that has children on different sides

44

# - track number of interesting revs that might still be on a side

44

# - track number of interesting revs that might still be on a side

45

# - track the lowest interesting rev seen

45

# - track the lowest interesting rev seen

46

# - quit when interesting revs is zero

46

# - quit when interesting revs is zero

47

48

cl = repo.changelog

48

cl = repo.changelog

49

working = len(cl) # pseudo rev for the working directory

49

working = len(cl) # pseudo rev for the working directory

50

if a is None:

50

if a is None:

51

a = working

51

a = working

52

if b is None:

52

if b is None:

53

b = working

53

b = working

54

55

side = {a: -1, b: 1}

55

side = {a: -1, b: 1}

56

visit = [-a, -b]

56

visit = [-a, -b]

57

heapq.heapify(visit)

57

heapq.heapify(visit)

58

interesting = len(visit)

58

interesting = len(visit)

59

hascommonancestor = False

59

hascommonancestor = False

60

limit = working

60

limit = working

61

62

while interesting:

62

while interesting:

63

r = -heapq.heappop(visit)

63

r = -heapq.heappop(visit)

64

if r == working:

64

if r == working:

65

parents = [cl.rev(p) for p in repo.dirstate.parents()]

65

parents = [cl.rev(p) for p in repo.dirstate.parents()]

66

else:

66

else:

67

parents = cl.parentrevs(r)

67

parents = cl.parentrevs(r)

68

for p in parents:

68

for p in parents:

69

if p < 0:

69

if p < 0:

70

continue

70

continue

71

if p not in side:

71

if p not in side:

72

# first time we see p; add it to visit

72

# first time we see p; add it to visit

73

side[p] = side[r]

73

side[p] = side[r]

74

if side[p]:

74

if side[p]:

75

interesting += 1

75

interesting += 1

76

heapq.heappush(visit, -p)

76

heapq.heappush(visit, -p)

77

elif side[p] and side[p] != side[r]:

77

elif side[p] and side[p] != side[r]:

78

# p was interesting but now we know better

78

# p was interesting but now we know better

79

side[p] = 0

79

side[p] = 0

80

interesting -= 1

80

interesting -= 1

81

hascommonancestor = True

81

hascommonancestor = True

82

if side[r]:

82

if side[r]:

83

limit = r # lowest rev visited

83

limit = r # lowest rev visited

84

interesting -= 1

84

interesting -= 1

85

86

if not hascommonancestor:

86

if not hascommonancestor:

87

return None

87

return None

88

89

# Consider the following flow (see test-commit-amend.t under issue4405):

89

# Consider the following flow (see test-commit-amend.t under issue4405):

90

# 1/ File 'a0' committed

90

# 1/ File 'a0' committed

91

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

91

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

92

# 3/ Move back to first commit

92

# 3/ Move back to first commit

93

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

93

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

94

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

94

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

95

#

95

#

96

# During the amend in step five, we will be in this state:

96

# During the amend in step five, we will be in this state:

97

#

97

#

98

# @ 3 temporary amend commit for a1-amend

98

# @ 3 temporary amend commit for a1-amend

99

# |

99

# |

100

# o 2 a1-amend

100

# o 2 a1-amend

101

# |

101

# |

102

# | o 1 a1

102

# | o 1 a1

103

# |/

103

# |/

104

# o 0 a0

104

# o 0 a0

105

#

105

#

106

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

106

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

107

# yet the filelog has the copy information in rev 1 and we will not look

107

# yet the filelog has the copy information in rev 1 and we will not look

108

# back far enough unless we also look at the a and b as candidates.

108

# back far enough unless we also look at the a and b as candidates.

109

# This only occurs when a is a descendent of b or visa-versa.

109

# This only occurs when a is a descendent of b or visa-versa.

110

return min(limit, a, b)

110

return min(limit, a, b)

111

112

def _chain(src, dst, a, b):

112

def _chain(src, dst, a, b):

113

"""chain two sets of copies a->b"""

113

"""chain two sets of copies a->b"""

114

t = a.copy()

114

t = a.copy()

115

for k, v in b.iteritems():

115

for k, v in b.iteritems():

116

if v in t:

116

if v in t:

117

# found a chain

117

# found a chain

118

if t[v] != k:

118

if t[v] != k:

119

# file wasn't renamed back to itself

119

# file wasn't renamed back to itself

120

t[k] = t[v]

120

t[k] = t[v]

121

if v not in dst:

121

if v not in dst:

122

# chain was a rename, not a copy

122

# chain was a rename, not a copy

123

del t[v]

123

del t[v]

124

if v in src:

124

if v in src:

125

# file is a copy of an existing file

125

# file is a copy of an existing file

126

t[k] = v

126

t[k] = v

127

128

# remove criss-crossed copies

128

# remove criss-crossed copies

129

for k, v in list(t.items()):

129

for k, v in list(t.items()):

130

if k in src and v in dst:

130

if k in src and v in dst:

131

del t[k]

131

del t[k]

132

133

return t

133

return t

134

135

def _tracefile(fctx, am, limit=-1):

135

def _tracefile(fctx, am, limit=-1):

136

"""return file context that is the ancestor of fctx present in ancestor

136

"""return file context that is the ancestor of fctx present in ancestor

137

manifest am, stopping after the first ancestor lower than limit"""

137

manifest am, stopping after the first ancestor lower than limit"""

138

139

for f in fctx.ancestors():

139

for f in fctx.ancestors():

140

if am.get(f.path(), None) == f.filenode():

140

if am.get(f.path(), None) == f.filenode():

141

return f

141

return f

142

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

142

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

143

return None

143

return None

144

145

def _dirstatecopies(d, match=None):

145

def _dirstatecopies(d, match=None):

146

ds = d._repo.dirstate

146

ds = d._repo.dirstate

147

c = ds.copies().copy()

147

c = ds.copies().copy()

148

for k in list(c):

148

for k in list(c):

149

if ds[k] not in 'anm' or (match and not match(k)):

149

if ds[k] not in 'anm' or (match and not match(k)):

150

del c[k]

150

del c[k]

151

return c

151

return c

152

153

def _computeforwardmissing(a, b, match=None):

153

def _computeforwardmissing(a, b, match=None):

154

"""Computes which files are in b but not a.

154

"""Computes which files are in b but not a.

155

This is its own function so extensions can easily wrap this call to see what

155

This is its own function so extensions can easily wrap this call to see what

156

files _forwardcopies is about to process.

156

files _forwardcopies is about to process.

157

"""

157

"""

158

ma = a.manifest()

158

ma = a.manifest()

159

mb = b.manifest()

159

mb = b.manifest()

160

return mb.filesnotin(ma, match=match)

160

return mb.filesnotin(ma, match=match)

161

162

def _committedforwardcopies(a, b, match):

162

def _committedforwardcopies(a, b, match):

163

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

163

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

164

# files might have to be traced back to the fctx parent of the last

164

# files might have to be traced back to the fctx parent of the last

165

# one-side-only changeset, but not further back than that

165

# one-side-only changeset, but not further back than that

166

repo = a._repo

166

repo = a._repo

167

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

167

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

168

dbg = repo.ui.debug

168

dbg = repo.ui.debug

169

if debug:

169

if debug:

170

dbg('debug.copies: looking into rename from %s to %s\n'

170

dbg('debug.copies: looking into rename from %s to %s\n'

171

% (a, b))

171

% (a, b))

172

limit = _findlimit(repo, a.rev(), b.rev())

172

limit = _findlimit(repo, a.rev(), b.rev())

173

if limit is None:

173

if limit is None:

174

limit = -1

174

limit = -1

175

if debug:

175

if debug:

176

dbg('debug.copies: search limit: %d\n' % limit)

176

dbg('debug.copies: search limit: %d\n' % limit)

177

am = a.manifest()

177

am = a.manifest()

178

179

# find where new files came from

179

# find where new files came from

180

# we currently don't try to find where old files went, too expensive

180

# we currently don't try to find where old files went, too expensive

181

# this means we can miss a case like 'hg rm b; hg cp a b'

181

# this means we can miss a case like 'hg rm b; hg cp a b'

182

cm = {}

182

cm = {}

183

184

# Computing the forward missing is quite expensive on large manifests, since

184

# Computing the forward missing is quite expensive on large manifests, since

185

# it compares the entire manifests. We can optimize it in the common use

185

# it compares the entire manifests. We can optimize it in the common use

186

# case of computing what copies are in a commit versus its parent (like

186

# case of computing what copies are in a commit versus its parent (like

187

# during a rebase or histedit). Note, we exclude merge commits from this

187

# during a rebase or histedit). Note, we exclude merge commits from this

188

# optimization, since the ctx.files() for a merge commit is not correct for

188

# optimization, since the ctx.files() for a merge commit is not correct for

189

# this comparison.

189

# this comparison.

190

forwardmissingmatch = match

190

forwardmissingmatch = match

191

if b.p1() == a and b.p2().node() == node.nullid:

191

if b.p1() == a and b.p2().node() == node.nullid:

192

filesmatcher = scmutil.matchfiles(a._repo, b.files())

192

filesmatcher = scmutil.matchfiles(a._repo, b.files())

193

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

193

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

194

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

194

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

195

196

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

196

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

197

198

if debug:

198

if debug:

199

dbg('debug.copies: missing file to search: %d\n' % len(missing))

199

dbg('debug.copies: missing file to search: %d\n' % len(missing))

200

201

for f in missing:

201

for f in missing:

202

if debug:

202

if debug:

203

dbg('debug.copies: tracing file: %s\n' % f)

203

dbg('debug.copies: tracing file: %s\n' % f)

204

fctx = b[f]

204

fctx = b[f]

205

fctx._ancestrycontext = ancestrycontext

205

fctx._ancestrycontext = ancestrycontext

206

207

if debug:

207

if debug:

208

start = util.timer()

208

start = util.timer()

209

ofctx = _tracefile(fctx, am, limit)

209

ofctx = _tracefile(fctx, am, limit)

210

if ofctx:

210

if ofctx:

211

if debug:

211

if debug:

212

dbg('debug.copies: rename of: %s\n' % ofctx._path)

212

dbg('debug.copies: rename of: %s\n' % ofctx._path)

213

cm[f] = ofctx.path()

213

cm[f] = ofctx.path()

214

if debug:

214

if debug:

215

dbg('debug.copies: time: %f seconds\n'

215

dbg('debug.copies: time: %f seconds\n'

216

% (util.timer() - start))

216

% (util.timer() - start))

217

return cm

217

return cm

218

219

def _forwardcopies(a, b, match=None):

219

def _forwardcopies(a, b, match=None):

220

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

220

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

221

222

match = a.repo().narrowmatch(match)

222

match = a.repo().narrowmatch(match)

223

# check for working copy

223

# check for working copy

224

if b.rev() is None:

224

if b.rev() is None:

225

if a == b.p1():

225

if a == b.p1():

226

# short-circuit to avoid issues with merge states

226

# short-circuit to avoid issues with merge states

227

return _dirstatecopies(b, match)

227

return _dirstatecopies(b, match)

228

229

cm = _committedforwardcopies(a, b.p1(), match)

229

cm = _committedforwardcopies(a, b.p1(), match)

230

# combine copies from dirstate if necessary

230

# combine copies from dirstate if necessary

231

return _chain(a, b, cm, _dirstatecopies(b, match))

231

return _chain(a, b, cm, _dirstatecopies(b, match))

232

return _committedforwardcopies(a, b, match)

232

return _committedforwardcopies(a, b, match)

233

234

def _backwardrenames(a, b):

234

def _backwardrenames(a, b):

235

if a._repo.ui.config('experimental', 'copytrace') == 'off':

235

if a._repo.ui.config('experimental', 'copytrace') == 'off':

236

return {}

236

return {}

237

238

# Even though we're not taking copies into account, 1:n rename situations

238

# Even though we're not taking copies into account, 1:n rename situations

239

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

239

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

240

# arbitrarily pick one of the renames.

240

# arbitrarily pick one of the renames.

241

f = _forwardcopies(b, a)

241

f = _forwardcopies(b, a)

242

r = {}

242

r = {}

243

for k, v in sorted(f.iteritems()):

243

for k, v in sorted(f.iteritems()):

244

# remove copies

244

# remove copies

245

if v in a:

245

if v in a:

246

continue

246

continue

247

r[v] = k

247

r[v] = k

248

return r

248

return r

249

250

def pathcopies(x, y, match=None):

250

def pathcopies(x, y, match=None):

251

"""find {dst@y: src@x} copy mapping for directed compare"""

251

"""find {dst@y: src@x} copy mapping for directed compare"""

252

repo = x._repo

252

repo = x._repo

253

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

253

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

254

if debug:

254

if debug:

255

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

255

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

256

% (x, y))

256

% (x, y))

257

if x == y or not x or not y:

257

if x == y or not x or not y:

258

return {}

258

return {}

259

a = y.ancestor(x)

259

a = y.ancestor(x)

260

if a == x:

260

if a == x:

261

if debug:

261

if debug:

262

repo.ui.debug('debug.copies: search mode: forward\n')

262

repo.ui.debug('debug.copies: search mode: forward\n')

263

return _forwardcopies(x, y, match=match)

263

return _forwardcopies(x, y, match=match)

264

if a == y:

264

if a == y:

265

if debug:

265

if debug:

266

repo.ui.debug('debug.copies: search mode: backward\n')

266

repo.ui.debug('debug.copies: search mode: backward\n')

267

return _backwardrenames(x, y)

267

return _backwardrenames(x, y)

268

if debug:

268

if debug:

269

repo.ui.debug('debug.copies: search mode: combined\n')

269

repo.ui.debug('debug.copies: search mode: combined\n')

270

return _chain(x, y, _backwardrenames(x, a),

270

return _chain(x, y, _backwardrenames(x, a),

271

_forwardcopies(a, y, match=match))

271

_forwardcopies(a, y, match=match))

272

273

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

273

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

274

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

274

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

275

and c2. This is its own function so extensions can easily wrap this call

275

and c2. This is its own function so extensions can easily wrap this call

276

to see what files mergecopies is about to process.

276

to see what files mergecopies is about to process.

277

278

Even though c1 and c2 are not used in this function, they are useful in

278

Even though c1 and c2 are not used in this function, they are useful in

279

other extensions for being able to read the file nodes of the changed files.

279

other extensions for being able to read the file nodes of the changed files.

280

281

"baselabel" can be passed to help distinguish the multiple computations

281

"baselabel" can be passed to help distinguish the multiple computations

282

done in the graft case.

282

done in the graft case.

283

"""

283

"""

284

u1 = sorted(addedinm1 - addedinm2)

284

u1 = sorted(addedinm1 - addedinm2)

285

u2 = sorted(addedinm2 - addedinm1)

285

u2 = sorted(addedinm2 - addedinm1)

286

287

header = " unmatched files in %s"

287

header = " unmatched files in %s"

288

if baselabel:

288

if baselabel:

289

header += ' (from %s)' % baselabel

289

header += ' (from %s)' % baselabel

290

if u1:

290

if u1:

291

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

291

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

292

if u2:

292

if u2:

293

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

293

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

294

295

return u1, u2

295

return u1, u2

296

297

def _makegetfctx(ctx):

297

def _makegetfctx(ctx):

298

"""return a 'getfctx' function suitable for _checkcopies usage

298

"""return a 'getfctx' function suitable for _checkcopies usage

299

300

We have to re-setup the function building 'filectx' for each

300

We have to re-setup the function building 'filectx' for each

301

'_checkcopies' to ensure the linkrev adjustment is properly setup for

301

'_checkcopies' to ensure the linkrev adjustment is properly setup for

302

each. Linkrev adjustment is important to avoid bug in rename

302

each. Linkrev adjustment is important to avoid bug in rename

303

detection. Moreover, having a proper '_ancestrycontext' setup ensures

303

detection. Moreover, having a proper '_ancestrycontext' setup ensures

304

the performance impact of this adjustment is kept limited. Without it,

304

the performance impact of this adjustment is kept limited. Without it,

305

each file could do a full dag traversal making the time complexity of

305

each file could do a full dag traversal making the time complexity of

306

the operation explode (see issue4537).

306

the operation explode (see issue4537).

307

308

This function exists here mostly to limit the impact on stable. Feel

308

This function exists here mostly to limit the impact on stable. Feel

309

free to refactor on default.

309

free to refactor on default.

310

"""

310

"""

311

rev = ctx.rev()

311

rev = ctx.rev()

312

repo = ctx._repo

312

repo = ctx._repo

313

ac = getattr(ctx, '_ancestrycontext', None)

313

ac = getattr(ctx, '_ancestrycontext', None)

314

if ac is None:

314

if ac is None:

315

revs = [rev]

315

revs = [rev]

316

if rev is None:

316

if rev is None:

317

revs = [p.rev() for p in ctx.parents()]

317

revs = [p.rev() for p in ctx.parents()]

318

ac = repo.changelog.ancestors(revs, inclusive=True)

318

ac = repo.changelog.ancestors(revs, inclusive=True)

319

ctx._ancestrycontext = ac

319

ctx._ancestrycontext = ac

320

def makectx(f, n):

320

def makectx(f, n):

321

if n in node.wdirfilenodeids: # in a working context?

321

if n in node.wdirfilenodeids: # in a working context?

322

if ctx.rev() is None:

322

if ctx.rev() is None:

323

return ctx.filectx(f)

323

return ctx.filectx(f)

324

return repo[None][f]

324

return repo[None][f]

325

fctx = repo.filectx(f, fileid=n)

325

fctx = repo.filectx(f, fileid=n)

326

# setup only needed for filectx not create from a changectx

326

# setup only needed for filectx not create from a changectx

327

fctx._ancestrycontext = ac

327

fctx._ancestrycontext = ac

328

fctx._descendantrev = rev

328

fctx._descendantrev = rev

329

return fctx

329

return fctx

330

return util.lrucachefunc(makectx)

330

return util.lrucachefunc(makectx)

331

332

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

332

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

333

"""combine partial copy paths"""

333

"""combine partial copy paths"""

334

remainder = {}

334

remainder = {}

335

for f in copyfrom:

335

for f in copyfrom:

336

if f in copyto:

336

if f in copyto:

337

finalcopy[copyto[f]] = copyfrom[f]

337

finalcopy[copyto[f]] = copyfrom[f]

338

del copyto[f]

338

del copyto[f]

339

for f in incompletediverge:

339

for f in incompletediverge:

340

assert f not in diverge

340

assert f not in diverge

341

ic = incompletediverge[f]

341

ic = incompletediverge[f]

342

if ic[0] in copyto:

342

if ic[0] in copyto:

343

diverge[f] = [copyto[ic[0]], ic[1]]

343

diverge[f] = [copyto[ic[0]], ic[1]]

344

else:

344

else:

345

remainder[f] = ic

345

remainder[f] = ic

346

return remainder

346

return remainder

347

348

def mergecopies(repo, c1, c2, base):

348

def mergecopies(repo, c1, c2, base):

349

"""

349

"""

350

The function calling different copytracing algorithms on the basis of config

350

The function calling different copytracing algorithms on the basis of config

351

which find moves and copies between context c1 and c2 that are relevant for

351

which find moves and copies between context c1 and c2 that are relevant for

352

merging. 'base' will be used as the merge base.

352

merging. 'base' will be used as the merge base.

353

354

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

354

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

355

files that were moved/ copied in one merge parent and modified in another.

355

files that were moved/ copied in one merge parent and modified in another.

356

For example:

356

For example:

357

358

o ---> 4 another commit

358

o ---> 4 another commit

359

|

359

|

360

| o ---> 3 commit that modifies a.txt

360

| o ---> 3 commit that modifies a.txt

361

| /

361

| /

362

o / ---> 2 commit that moves a.txt to b.txt

362

o / ---> 2 commit that moves a.txt to b.txt

363

|/

363

|/

364

o ---> 1 merge base

364

o ---> 1 merge base

365

366

If we try to rebase revision 3 on revision 4, since there is no a.txt in

366

If we try to rebase revision 3 on revision 4, since there is no a.txt in

367

revision 4, and if user have copytrace disabled, we prints the following

367

revision 4, and if user have copytrace disabled, we prints the following

368

message:

368

message:

369

370

```other changed <file> which local deleted```

370

```other changed <file> which local deleted```

371

372

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

372

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

373

"dirmove".

373

"dirmove".

374

375

"copy" is a mapping from destination name -> source name,

375

"copy" is a mapping from destination name -> source name,

376

where source is in c1 and destination is in c2 or vice-versa.

376

where source is in c1 and destination is in c2 or vice-versa.

377

378

"movewithdir" is a mapping from source name -> destination name,

378

"movewithdir" is a mapping from source name -> destination name,

379

where the file at source present in one context but not the other

379

where the file at source present in one context but not the other

380

needs to be moved to destination by the merge process, because the

380

needs to be moved to destination by the merge process, because the

381

other context moved the directory it is in.

381

other context moved the directory it is in.

382

383

"diverge" is a mapping of source name -> list of destination names

383

"diverge" is a mapping of source name -> list of destination names

384

for divergent renames.

384

for divergent renames.

385

386

"renamedelete" is a mapping of source name -> list of destination

386

"renamedelete" is a mapping of source name -> list of destination

387

names for files deleted in c1 that were renamed in c2 or vice-versa.

387

names for files deleted in c1 that were renamed in c2 or vice-versa.

388

389

"dirmove" is a mapping of detected source dir -> destination dir renames.

389

"dirmove" is a mapping of detected source dir -> destination dir renames.

390

This is needed for handling changes to new files previously grafted into

390

This is needed for handling changes to new files previously grafted into

391

renamed directories.

391

renamed directories.

392

"""

392

"""

393

# avoid silly behavior for update from empty dir

393

# avoid silly behavior for update from empty dir

394

if not c1 or not c2 or c1 == c2:

394

if not c1 or not c2 or c1 == c2:

395

return {}, {}, {}, {}, {}

395

return {}, {}, {}, {}, {}

396

397

# avoid silly behavior for parent -> working dir

397

# avoid silly behavior for parent -> working dir

398

if c2.node() is None and c1.node() == repo.dirstate.p1():

398

if c2.node() is None and c1.node() == repo.dirstate.p1():

399

return repo.dirstate.copies(), {}, {}, {}, {}

399

return repo.dirstate.copies(), {}, {}, {}, {}

400

401

copytracing = repo.ui.config('experimental', 'copytrace')

401

copytracing = repo.ui.config('experimental', 'copytrace')

402

boolctrace = stringutil.parsebool(copytracing)

402

boolctrace = stringutil.parsebool(copytracing)

403

404

# Copy trace disabling is explicitly below the node == p1 logic above

404

# Copy trace disabling is explicitly below the node == p1 logic above

405

# because the logic above is required for a simple copy to be kept across a

405

# because the logic above is required for a simple copy to be kept across a

406

# rebase.

406

# rebase.

407

if copytracing == 'heuristics':

407

if copytracing == 'heuristics':

408

# Do full copytracing if only non-public revisions are involved as

408

# Do full copytracing if only non-public revisions are involved as

409

# that will be fast enough and will also cover the copies which could

409

# that will be fast enough and will also cover the copies which could

410

# be missed by heuristics

410

# be missed by heuristics

411

if _isfullcopytraceable(repo, c1, base):

411

if _isfullcopytraceable(repo, c1, base):

412

return _fullcopytracing(repo, c1, c2, base)

412

return _fullcopytracing(repo, c1, c2, base)

413

return _heuristicscopytracing(repo, c1, c2, base)

413

return _heuristicscopytracing(repo, c1, c2, base)

414

elif boolctrace is False:

414

elif boolctrace is False:

415

# stringutil.parsebool() returns None when it is unable to parse the

415

# stringutil.parsebool() returns None when it is unable to parse the

416

# value, so we should rely on making sure copytracing is on such cases

416

# value, so we should rely on making sure copytracing is on such cases

417

return {}, {}, {}, {}, {}

417

return {}, {}, {}, {}, {}

418

else:

418

else:

419

return _fullcopytracing(repo, c1, c2, base)

419

return _fullcopytracing(repo, c1, c2, base)

420

421

def _isfullcopytraceable(repo, c1, base):

421

def _isfullcopytraceable(repo, c1, base):

422

""" Checks that if base, source and destination are all no-public branches,

422

""" Checks that if base, source and destination are all no-public branches,

423

if yes let's use the full copytrace algorithm for increased capabilities

423

if yes let's use the full copytrace algorithm for increased capabilities

424

since it will be fast enough.

424

since it will be fast enough.

425

426

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

426

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

427

number of changesets from c1 to base such that if number of changesets are

427

number of changesets from c1 to base such that if number of changesets are

428

more than the limit, full copytracing algorithm won't be used.

428

more than the limit, full copytracing algorithm won't be used.

429

"""

429

"""

430

if c1.rev() is None:

430

if c1.rev() is None:

431

c1 = c1.p1()

431

c1 = c1.p1()

432

if c1.mutable() and base.mutable():

432

if c1.mutable() and base.mutable():

433

sourcecommitlimit = repo.ui.configint('experimental',

433

sourcecommitlimit = repo.ui.configint('experimental',

434

'copytrace.sourcecommitlimit')

434

'copytrace.sourcecommitlimit')

435

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

435

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

436

return commits < sourcecommitlimit

436

return commits < sourcecommitlimit

437

return False

437

return False

438

439

def _fullcopytracing(repo, c1, c2, base):

439

def _fullcopytracing(repo, c1, c2, base):

440

""" The full copytracing algorithm which finds all the new files that were

440

""" The full copytracing algorithm which finds all the new files that were

441

added from merge base up to the top commit and for each file it checks if

441

added from merge base up to the top commit and for each file it checks if

442

this file was copied from another file.

442

this file was copied from another file.

443

444

This is pretty slow when a lot of changesets are involved but will track all

444

This is pretty slow when a lot of changesets are involved but will track all

445

the copies.

445

the copies.

446

"""

446

"""

447

# In certain scenarios (e.g. graft, update or rebase), base can be

447

# In certain scenarios (e.g. graft, update or rebase), base can be

448

# overridden We still need to know a real common ancestor in this case We

448

# overridden We still need to know a real common ancestor in this case We

449

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

449

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

450

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

450

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

451

# caller may not know if the revision passed in lieu of the CA is a genuine

451

# caller may not know if the revision passed in lieu of the CA is a genuine

452

# common ancestor or not without explicitly checking it, it's better to

452

# common ancestor or not without explicitly checking it, it's better to

453

# determine that here.

453

# determine that here.

454

#

454

#

455

# base.isancestorof(wc) is False, work around that

455

# base.isancestorof(wc) is False, work around that

456

_c1 = c1.p1() if c1.rev() is None else c1

456

_c1 = c1.p1() if c1.rev() is None else c1

457

_c2 = c2.p1() if c2.rev() is None else c2

457

_c2 = c2.p1() if c2.rev() is None else c2

458

# an endpoint is "dirty" if it isn't a descendant of the merge base

458

# an endpoint is "dirty" if it isn't a descendant of the merge base

459

# if we have a dirty endpoint, we need to trigger graft logic, and also

459

# if we have a dirty endpoint, we need to trigger graft logic, and also

460

# keep track of which endpoint is dirty

460

# keep track of which endpoint is dirty

461

dirtyc1 = not base.isancestorof(_c1)

461

dirtyc1 = not base.isancestorof(_c1)

462

dirtyc2 = not base.isancestorof(_c2)

462

dirtyc2 = not base.isancestorof(_c2)

463

graft = dirtyc1 or dirtyc2

463

graft = dirtyc1 or dirtyc2

464

tca = base

464

tca = base

465

if graft:

465

if graft:

466

tca = _c1.ancestor(_c2)

466

tca = _c1.ancestor(_c2)

467

468

limit = _findlimit(repo, c1.rev(), c2.rev())

468

limit = _findlimit(repo, c1.rev(), c2.rev())

469

if limit is None:

469

if limit is None:

470

# no common ancestor, no copies

470

# no common ancestor, no copies

471

return {}, {}, {}, {}, {}

471

return {}, {}, {}, {}, {}

472

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

472

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

473

474

m1 = c1.manifest()

474

m1 = c1.manifest()

475

m2 = c2.manifest()

475

m2 = c2.manifest()

476

mb = base.manifest()

476

mb = base.manifest()

477

478

# gather data from _checkcopies:

478

# gather data from _checkcopies:

479

# - diverge = record all diverges in this dict

479

# - diverge = record all diverges in this dict

480

# - copy = record all non-divergent copies in this dict

480

# - copy = record all non-divergent copies in this dict

481

# - fullcopy = record all copies in this dict

481

# - fullcopy = record all copies in this dict

482

# - incomplete = record non-divergent partial copies here

482

# - incomplete = record non-divergent partial copies here

483

# - incompletediverge = record divergent partial copies here

483

# - incompletediverge = record divergent partial copies here

484

diverge = {} # divergence data is shared

484

diverge = {} # divergence data is shared

485

incompletediverge = {}

485

incompletediverge = {}

486

data1 = {'copy': {},

486

data1 = {'copy': {},

487

'fullcopy': {},

487

'fullcopy': {},

488

'incomplete': {},

488

'incomplete': {},

489

'diverge': diverge,

489

'diverge': diverge,

490

'incompletediverge': incompletediverge,

490

'incompletediverge': incompletediverge,

491

}

491

}

492

data2 = {'copy': {},

492

data2 = {'copy': {},

493

'fullcopy': {},

493

'fullcopy': {},

494

'incomplete': {},

494

'incomplete': {},

495

'diverge': diverge,

495

'diverge': diverge,

496

'incompletediverge': incompletediverge,

496

'incompletediverge': incompletediverge,

497

}

497

}

498

499

# find interesting file sets from manifests

499

# find interesting file sets from manifests

500

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

500

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

501

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

501

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

502

bothnew = sorted(addedinm1 & addedinm2)

502

bothnew = sorted(addedinm1 & addedinm2)

503

if tca == base:

503

if tca == base:

504

# unmatched file from base

504

# unmatched file from base

505

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

505

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

506

u1u, u2u = u1r, u2r

506

u1u, u2u = u1r, u2r

507

else:

507

else:

508

# unmatched file from base (DAG rotation in the graft case)

508

# unmatched file from base (DAG rotation in the graft case)

509

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

509

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

510

baselabel='base')

510

baselabel='base')

511

# unmatched file from topological common ancestors (no DAG rotation)

511

# unmatched file from topological common ancestors (no DAG rotation)

512

# need to recompute this for directory move handling when grafting

512

# need to recompute this for directory move handling when grafting

513

mta = tca.manifest()

513

mta = tca.manifest()

514

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.~~filesnotin~~(~~mta~~),

514

u1u, u2u = _computenonoverlap(repo, c1, c2,

515

m2.filesnotin(mta),

515

m1.filesnotin(mta, repo.narrowmatch()),

516

m2.filesnotin(mta, repo.narrowmatch()),

516

baselabel='topological common ancestor')

517

baselabel='topological common ancestor')

517

518

for f in u1u:

519

for f in u1u:

519

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

520

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

520

521

for f in u2u:

522

for f in u2u:

522

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

523

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

523

524

copy = dict(data1['copy'])

525

copy = dict(data1['copy'])

525

copy.update(data2['copy'])

526

copy.update(data2['copy'])

526

fullcopy = dict(data1['fullcopy'])

527

fullcopy = dict(data1['fullcopy'])

527

fullcopy.update(data2['fullcopy'])

528

fullcopy.update(data2['fullcopy'])

528

529

if dirtyc1:

530

if dirtyc1:

530

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

531

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

531

incompletediverge)

532

incompletediverge)

532

else:

533

else:

533

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

534

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

534

incompletediverge)

535

incompletediverge)

535

536

renamedelete = {}

537

renamedelete = {}

537

renamedeleteset = set()

538

renamedeleteset = set()

538

divergeset = set()

539

divergeset = set()

539

for of, fl in list(diverge.items()):

540

for of, fl in list(diverge.items()):

540

if len(fl) == 1 or of in c1 or of in c2:

541

if len(fl) == 1 or of in c1 or of in c2:

541

del diverge[of] # not actually divergent, or not a rename

542

del diverge[of] # not actually divergent, or not a rename

542

if of not in c1 and of not in c2:

543

if of not in c1 and of not in c2:

543

# renamed on one side, deleted on the other side, but filter

544

# renamed on one side, deleted on the other side, but filter

544

# out files that have been renamed and then deleted

545

# out files that have been renamed and then deleted

545

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

546

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

546

renamedeleteset.update(fl) # reverse map for below

547

renamedeleteset.update(fl) # reverse map for below

547

else:

548

else:

548

divergeset.update(fl) # reverse map for below

549

divergeset.update(fl) # reverse map for below

549

550

if bothnew:

551

if bothnew:

551

repo.ui.debug(" unmatched files new in both:\n %s\n"

552

repo.ui.debug(" unmatched files new in both:\n %s\n"

552

% "\n ".join(bothnew))

553

% "\n ".join(bothnew))

553

bothdiverge = {}

554

bothdiverge = {}

554

bothincompletediverge = {}

555

bothincompletediverge = {}

555

remainder = {}

556

remainder = {}

556

both1 = {'copy': {},

557

both1 = {'copy': {},

557

'fullcopy': {},

558

'fullcopy': {},

558

'incomplete': {},

559

'incomplete': {},

559

'diverge': bothdiverge,

560

'diverge': bothdiverge,

560

'incompletediverge': bothincompletediverge

561

'incompletediverge': bothincompletediverge

561

}

562

}

562

both2 = {'copy': {},

563

both2 = {'copy': {},

563

'fullcopy': {},

564

'fullcopy': {},

564

'incomplete': {},

565

'incomplete': {},

565

'diverge': bothdiverge,

566

'diverge': bothdiverge,

566

'incompletediverge': bothincompletediverge

567

'incompletediverge': bothincompletediverge

567

}

568

}

568

for f in bothnew:

569

for f in bothnew:

569

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

570

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

570

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

571

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

571

if dirtyc1:

572

if dirtyc1:

572

# incomplete copies may only be found on the "dirty" side for bothnew

573

# incomplete copies may only be found on the "dirty" side for bothnew

573

assert not both2['incomplete']

574

assert not both2['incomplete']

574

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

575

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

575

bothincompletediverge)

576

bothincompletediverge)

576

elif dirtyc2:

577

elif dirtyc2:

577

assert not both1['incomplete']

578

assert not both1['incomplete']

578

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

579

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

579

bothincompletediverge)

580

bothincompletediverge)

580

else:

581

else:

581

# incomplete copies and divergences can't happen outside grafts

582

# incomplete copies and divergences can't happen outside grafts

582

assert not both1['incomplete']

583

assert not both1['incomplete']

583

assert not both2['incomplete']

584

assert not both2['incomplete']

584

assert not bothincompletediverge

585

assert not bothincompletediverge

585

for f in remainder:

586

for f in remainder:

586

assert f not in bothdiverge

587

assert f not in bothdiverge

587

ic = remainder[f]

588

ic = remainder[f]

588

if ic[0] in (m1 if dirtyc1 else m2):

589

if ic[0] in (m1 if dirtyc1 else m2):

589

# backed-out rename on one side, but watch out for deleted files

590

# backed-out rename on one side, but watch out for deleted files

590

bothdiverge[f] = ic

591

bothdiverge[f] = ic

591

for of, fl in bothdiverge.items():

592

for of, fl in bothdiverge.items():

592

if len(fl) == 2 and fl[0] == fl[1]:

593

if len(fl) == 2 and fl[0] == fl[1]:

593

copy[fl[0]] = of # not actually divergent, just matching renames

594

copy[fl[0]] = of # not actually divergent, just matching renames

594

595

if fullcopy and repo.ui.debugflag:

596

if fullcopy and repo.ui.debugflag:

596

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

597

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

597

"% = renamed and deleted):\n")

598

"% = renamed and deleted):\n")

598

for f in sorted(fullcopy):

599

for f in sorted(fullcopy):

599

note = ""

600

note = ""

600

if f in copy:

601

if f in copy:

601

note += "*"

602

note += "*"

602

if f in divergeset:

603

if f in divergeset:

603

note += "!"

604

note += "!"

604

if f in renamedeleteset:

605

if f in renamedeleteset:

605

note += "%"

606

note += "%"

606

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

607

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

607

note))

608

note))

608

del divergeset

609

del divergeset

609

610

if not fullcopy:

611

if not fullcopy:

611

return copy, {}, diverge, renamedelete, {}

612

return copy, {}, diverge, renamedelete, {}

612

613

repo.ui.debug(" checking for directory renames\n")

614

repo.ui.debug(" checking for directory renames\n")

614

615

# generate a directory move map

616

# generate a directory move map

616

d1, d2 = c1.dirs(), c2.dirs()

617

d1, d2 = c1.dirs(), c2.dirs()

617

# Hack for adding '', which is not otherwise added, to d1 and d2

618

# Hack for adding '', which is not otherwise added, to d1 and d2

618

d1.addpath('/')

619

d1.addpath('/')

619

d2.addpath('/')

620

d2.addpath('/')

620

invalid = set()

621

invalid = set()

621

dirmove = {}

622

dirmove = {}

622

623

# examine each file copy for a potential directory move, which is

624

# examine each file copy for a potential directory move, which is

624

# when all the files in a directory are moved to a new directory

625

# when all the files in a directory are moved to a new directory

625

for dst, src in fullcopy.iteritems():

626

for dst, src in fullcopy.iteritems():

626

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

627

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

627

if dsrc in invalid:

628

if dsrc in invalid:

628

# already seen to be uninteresting

629

# already seen to be uninteresting

629

continue

630

continue

630

elif dsrc in d1 and ddst in d1:

631

elif dsrc in d1 and ddst in d1:

631

# directory wasn't entirely moved locally

632

# directory wasn't entirely moved locally

632

invalid.add(dsrc)

633

invalid.add(dsrc)

633

elif dsrc in d2 and ddst in d2:

634

elif dsrc in d2 and ddst in d2:

634

# directory wasn't entirely moved remotely

635

# directory wasn't entirely moved remotely

635

invalid.add(dsrc)

636

invalid.add(dsrc)

636

elif dsrc in dirmove and dirmove[dsrc] != ddst:

637

elif dsrc in dirmove and dirmove[dsrc] != ddst:

637

# files from the same directory moved to two different places

638

# files from the same directory moved to two different places

638

invalid.add(dsrc)

639

invalid.add(dsrc)

639

else:

640

else:

640

# looks good so far

641

# looks good so far

641

dirmove[dsrc] = ddst

642

dirmove[dsrc] = ddst

642

643

for i in invalid:

644

for i in invalid:

644

if i in dirmove:

645

if i in dirmove:

645

del dirmove[i]

646

del dirmove[i]

646

del d1, d2, invalid

647

del d1, d2, invalid

647

648

if not dirmove:

649

if not dirmove:

649

return copy, {}, diverge, renamedelete, {}

650

return copy, {}, diverge, renamedelete, {}

650

651

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

652

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

652

653

for d in dirmove:

654

for d in dirmove:

654

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

655

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

655

(d, dirmove[d]))

656

(d, dirmove[d]))

656

657

movewithdir = {}

658

movewithdir = {}

658

# check unaccounted nonoverlapping files against directory moves

659

# check unaccounted nonoverlapping files against directory moves

659

for f in u1r + u2r:

660

for f in u1r + u2r:

660

if f not in fullcopy:

661

if f not in fullcopy:

661

for d in dirmove:

662

for d in dirmove:

662

if f.startswith(d):

663

if f.startswith(d):

663

# new file added in a directory that was moved, move it

664

# new file added in a directory that was moved, move it

664

df = dirmove[d] + f[len(d):]

665

df = dirmove[d] + f[len(d):]

665

if df not in copy:

666

if df not in copy:

666

movewithdir[f] = df

667

movewithdir[f] = df

667

repo.ui.debug((" pending file src: '%s' -> "

668

repo.ui.debug((" pending file src: '%s' -> "

668

"dst: '%s'\n") % (f, df))

669

"dst: '%s'\n") % (f, df))

669

break

670

break

670

671

return copy, movewithdir, diverge, renamedelete, dirmove

672

return copy, movewithdir, diverge, renamedelete, dirmove

672

673

def _heuristicscopytracing(repo, c1, c2, base):

674

def _heuristicscopytracing(repo, c1, c2, base):

674

""" Fast copytracing using filename heuristics

675

""" Fast copytracing using filename heuristics

675

676

Assumes that moves or renames are of following two types:

677

Assumes that moves or renames are of following two types:

677

678

1) Inside a directory only (same directory name but different filenames)

679

1) Inside a directory only (same directory name but different filenames)

679

2) Move from one directory to another

680

2) Move from one directory to another

680

(same filenames but different directory names)

681

(same filenames but different directory names)

681

682

Works only when there are no merge commits in the "source branch".

683

Works only when there are no merge commits in the "source branch".

683

Source branch is commits from base up to c2 not including base.

684

Source branch is commits from base up to c2 not including base.

684

685

If merge is involved it fallbacks to _fullcopytracing().

686

If merge is involved it fallbacks to _fullcopytracing().

686

687

Can be used by setting the following config:

688

Can be used by setting the following config:

688

689

[experimental]

690

[experimental]

690

copytrace = heuristics

691

copytrace = heuristics

691

692

In some cases the copy/move candidates found by heuristics can be very large

693

In some cases the copy/move candidates found by heuristics can be very large

693

in number and that will make the algorithm slow. The number of possible

694

in number and that will make the algorithm slow. The number of possible

694

candidates to check can be limited by using the config

695

candidates to check can be limited by using the config

695

`experimental.copytrace.movecandidateslimit` which defaults to 100.

696

`experimental.copytrace.movecandidateslimit` which defaults to 100.

696

"""

697

"""

697

698

if c1.rev() is None:

699

if c1.rev() is None:

699

c1 = c1.p1()

700

c1 = c1.p1()

700

if c2.rev() is None:

701

if c2.rev() is None:

701

c2 = c2.p1()

702

c2 = c2.p1()

702

703

copies = {}

704

copies = {}

704

705

changedfiles = set()

706

changedfiles = set()

706

m1 = c1.manifest()

707

m1 = c1.manifest()

707

if not repo.revs('%d::%d', base.rev(), c2.rev()):

708

if not repo.revs('%d::%d', base.rev(), c2.rev()):

708

# If base is not in c2 branch, we switch to fullcopytracing

709

# If base is not in c2 branch, we switch to fullcopytracing

709

repo.ui.debug("switching to full copytracing as base is not "

710

repo.ui.debug("switching to full copytracing as base is not "

710

"an ancestor of c2\n")

711

"an ancestor of c2\n")

711

return _fullcopytracing(repo, c1, c2, base)

712

return _fullcopytracing(repo, c1, c2, base)

712

713

ctx = c2

714

ctx = c2

714

while ctx != base:

715

while ctx != base:

715

if len(ctx.parents()) == 2:

716

if len(ctx.parents()) == 2:

716

# To keep things simple let's not handle merges

717

# To keep things simple let's not handle merges

717

repo.ui.debug("switching to full copytracing because of merges\n")

718

repo.ui.debug("switching to full copytracing because of merges\n")

718

return _fullcopytracing(repo, c1, c2, base)

719

return _fullcopytracing(repo, c1, c2, base)

719

changedfiles.update(ctx.files())

720

changedfiles.update(ctx.files())

720

ctx = ctx.p1()

721

ctx = ctx.p1()

721

722

cp = _forwardcopies(base, c2)

723

cp = _forwardcopies(base, c2)

723

for dst, src in cp.iteritems():

724

for dst, src in cp.iteritems():

724

if src in m1:

725

if src in m1:

725

copies[dst] = src

726

copies[dst] = src

726

727

# file is missing if it isn't present in the destination, but is present in

728

# file is missing if it isn't present in the destination, but is present in

728

# the base and present in the source.

729

# the base and present in the source.

729

# Presence in the base is important to exclude added files, presence in the

730

# Presence in the base is important to exclude added files, presence in the

730

# source is important to exclude removed files.

731

# source is important to exclude removed files.

731

filt = lambda f: f not in m1 and f in base and f in c2

732

filt = lambda f: f not in m1 and f in base and f in c2

732

missingfiles = [f for f in changedfiles if filt(f)]

733

missingfiles = [f for f in changedfiles if filt(f)]

733

734

if missingfiles:

735

if missingfiles:

735

basenametofilename = collections.defaultdict(list)

736

basenametofilename = collections.defaultdict(list)

736

dirnametofilename = collections.defaultdict(list)

737

dirnametofilename = collections.defaultdict(list)

737

738

for f in m1.filesnotin(base.manifest()):

739

for f in m1.filesnotin(base.manifest()):

739

basename = os.path.basename(f)

740

basename = os.path.basename(f)

740

dirname = os.path.dirname(f)

741

dirname = os.path.dirname(f)

741

basenametofilename[basename].append(f)

742

basenametofilename[basename].append(f)

742

dirnametofilename[dirname].append(f)

743

dirnametofilename[dirname].append(f)

743

744

for f in missingfiles:

745

for f in missingfiles:

745

basename = os.path.basename(f)

746

basename = os.path.basename(f)

746

dirname = os.path.dirname(f)

747

dirname = os.path.dirname(f)

747

samebasename = basenametofilename[basename]

748

samebasename = basenametofilename[basename]

748

samedirname = dirnametofilename[dirname]

749

samedirname = dirnametofilename[dirname]

749

movecandidates = samebasename + samedirname

750

movecandidates = samebasename + samedirname

750

# f is guaranteed to be present in c2, that's why

751

# f is guaranteed to be present in c2, that's why

751

# c2.filectx(f) won't fail

752

# c2.filectx(f) won't fail

752

f2 = c2.filectx(f)

753

f2 = c2.filectx(f)

753

# we can have a lot of candidates which can slow down the heuristics

754

# we can have a lot of candidates which can slow down the heuristics

754

# config value to limit the number of candidates moves to check

755

# config value to limit the number of candidates moves to check

755

maxcandidates = repo.ui.configint('experimental',

756

maxcandidates = repo.ui.configint('experimental',

756

'copytrace.movecandidateslimit')

757

'copytrace.movecandidateslimit')

757

758

if len(movecandidates) > maxcandidates:

759

if len(movecandidates) > maxcandidates:

759

repo.ui.status(_("skipping copytracing for '%s', more "

760

repo.ui.status(_("skipping copytracing for '%s', more "

760

"candidates than the limit: %d\n")

761

"candidates than the limit: %d\n")

761

% (f, len(movecandidates)))

762

% (f, len(movecandidates)))

762

continue

763

continue

763

764

for candidate in movecandidates:

765

for candidate in movecandidates:

765

f1 = c1.filectx(candidate)

766

f1 = c1.filectx(candidate)

766

if _related(f1, f2):

767

if _related(f1, f2):

767

# if there are a few related copies then we'll merge

768

# if there are a few related copies then we'll merge

768

# changes into all of them. This matches the behaviour

769

# changes into all of them. This matches the behaviour

769

# of upstream copytracing

770

# of upstream copytracing

770

copies[candidate] = f

771

copies[candidate] = f

771

772

return copies, {}, {}, {}, {}

773

return copies, {}, {}, {}, {}

773

774

def _related(f1, f2):

775

def _related(f1, f2):

775

"""return True if f1 and f2 filectx have a common ancestor

776

"""return True if f1 and f2 filectx have a common ancestor

776

777

Walk back to common ancestor to see if the two files originate

778

Walk back to common ancestor to see if the two files originate

778

from the same file. Since workingfilectx's rev() is None it messes

779

from the same file. Since workingfilectx's rev() is None it messes

779

up the integer comparison logic, hence the pre-step check for

780

up the integer comparison logic, hence the pre-step check for

780

None (f1 and f2 can only be workingfilectx's initially).

781

None (f1 and f2 can only be workingfilectx's initially).

781

"""

782

"""

782

783

if f1 == f2:

784

if f1 == f2:

784

return f1 # a match

785

return f1 # a match

785

786

g1, g2 = f1.ancestors(), f2.ancestors()

787

g1, g2 = f1.ancestors(), f2.ancestors()

787

try:

788

try:

788

f1r, f2r = f1.linkrev(), f2.linkrev()

789

f1r, f2r = f1.linkrev(), f2.linkrev()

789

790

if f1r is None:

791

if f1r is None:

791

f1 = next(g1)

792

f1 = next(g1)

792

if f2r is None:

793

if f2r is None:

793

f2 = next(g2)

794

f2 = next(g2)

794

795

while True:

796

while True:

796

f1r, f2r = f1.linkrev(), f2.linkrev()

797

f1r, f2r = f1.linkrev(), f2.linkrev()

797

if f1r > f2r:

798

if f1r > f2r:

798

f1 = next(g1)

799

f1 = next(g1)

799

elif f2r > f1r:

800

elif f2r > f1r:

800

f2 = next(g2)

801

f2 = next(g2)

801

else: # f1 and f2 point to files in the same linkrev

802

else: # f1 and f2 point to files in the same linkrev

802

return f1 == f2 # true if they point to the same file

803

return f1 == f2 # true if they point to the same file

803

except StopIteration:

804

except StopIteration:

804

return False

805

return False

805

806

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

807

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

807

"""

808

"""

808

check possible copies of f from msrc to mdst

809

check possible copies of f from msrc to mdst

809

810

srcctx = starting context for f in msrc

811

srcctx = starting context for f in msrc

811

dstctx = destination context for f in mdst

812

dstctx = destination context for f in mdst

812

f = the filename to check (as in msrc)

813

f = the filename to check (as in msrc)

813

base = the changectx used as a merge base

814

base = the changectx used as a merge base

814

tca = topological common ancestor for graft-like scenarios

815

tca = topological common ancestor for graft-like scenarios

815

remotebase = True if base is outside tca::srcctx, False otherwise

816

remotebase = True if base is outside tca::srcctx, False otherwise

816

limit = the rev number to not search beyond

817

limit = the rev number to not search beyond

817

data = dictionary of dictionary to store copy data. (see mergecopies)

818

data = dictionary of dictionary to store copy data. (see mergecopies)

818

819

note: limit is only an optimization, and provides no guarantee that

820

note: limit is only an optimization, and provides no guarantee that

820

irrelevant revisions will not be visited

821

irrelevant revisions will not be visited

821

there is no easy way to make this algorithm stop in a guaranteed way

822

there is no easy way to make this algorithm stop in a guaranteed way

822

once it "goes behind a certain revision".

823

once it "goes behind a certain revision".

823

"""

824

"""

824

825

msrc = srcctx.manifest()

826

msrc = srcctx.manifest()

826

mdst = dstctx.manifest()

827

mdst = dstctx.manifest()

827

mb = base.manifest()

828

mb = base.manifest()

828

mta = tca.manifest()

829

mta = tca.manifest()

829

# Might be true if this call is about finding backward renames,

830

# Might be true if this call is about finding backward renames,

830

# This happens in the case of grafts because the DAG is then rotated.

831

# This happens in the case of grafts because the DAG is then rotated.

831

# If the file exists in both the base and the source, we are not looking

832

# If the file exists in both the base and the source, we are not looking

832

# for a rename on the source side, but on the part of the DAG that is

833

# for a rename on the source side, but on the part of the DAG that is

833

# traversed backwards.

834

# traversed backwards.

834

#

835

#

835

# In the case there is both backward and forward renames (before and after

836

# In the case there is both backward and forward renames (before and after

836

# the base) this is more complicated as we must detect a divergence.

837

# the base) this is more complicated as we must detect a divergence.

837

# We use 'backwards = False' in that case.

838

# We use 'backwards = False' in that case.

838

backwards = not remotebase and base != tca and f in mb

839

backwards = not remotebase and base != tca and f in mb

839

getsrcfctx = _makegetfctx(srcctx)

840

getsrcfctx = _makegetfctx(srcctx)

840

getdstfctx = _makegetfctx(dstctx)

841

getdstfctx = _makegetfctx(dstctx)

841

842

if msrc[f] == mb.get(f) and not remotebase:

843

if msrc[f] == mb.get(f) and not remotebase:

843

# Nothing to merge

844

# Nothing to merge

844

return

845

return

845

846

of = None

847

of = None

847

seen = {f}

848

seen = {f}

848

for oc in getsrcfctx(f, msrc[f]).ancestors():

849

for oc in getsrcfctx(f, msrc[f]).ancestors():

849

of = oc.path()

850

of = oc.path()

850

if of in seen:

851

if of in seen:

851

# check limit late - grab last rename before

852

# check limit late - grab last rename before

852

if oc.linkrev() < limit:

853

if oc.linkrev() < limit:

853

break

854

break

854

continue

855

continue

855

seen.add(of)

856

seen.add(of)

856

857

# remember for dir rename detection

858

# remember for dir rename detection

858

if backwards:

859

if backwards:

859

data['fullcopy'][of] = f # grafting backwards through renames

860

data['fullcopy'][of] = f # grafting backwards through renames

860

else:

861

else:

861

data['fullcopy'][f] = of

862

data['fullcopy'][f] = of

862

if of not in mdst:

863

if of not in mdst:

863

continue # no match, keep looking

864

continue # no match, keep looking

864

if mdst[of] == mb.get(of):

865

if mdst[of] == mb.get(of):

865

return # no merge needed, quit early

866

return # no merge needed, quit early

866

c2 = getdstfctx(of, mdst[of])

867

c2 = getdstfctx(of, mdst[of])

867

# c2 might be a plain new file on added on destination side that is

868

# c2 might be a plain new file on added on destination side that is

868

# unrelated to the droids we are looking for.

869

# unrelated to the droids we are looking for.

869

cr = _related(oc, c2)

870

cr = _related(oc, c2)

870

if cr and (of == f or of == c2.path()): # non-divergent

871

if cr and (of == f or of == c2.path()): # non-divergent

871

if backwards:

872

if backwards:

872

data['copy'][of] = f

873

data['copy'][of] = f

873

elif of in mb:

874

elif of in mb:

874

data['copy'][f] = of

875

data['copy'][f] = of

875

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

876

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

876

data['copy'][of] = f

877

data['copy'][of] = f

877

del data['fullcopy'][f]

878

del data['fullcopy'][f]

878

data['fullcopy'][of] = f

879

data['fullcopy'][of] = f

879

else: # divergence w.r.t. graft CA on one side of topological CA

880

else: # divergence w.r.t. graft CA on one side of topological CA

880

for sf in seen:

881

for sf in seen:

881

if sf in mb:

882

if sf in mb:

882

assert sf not in data['diverge']

883

assert sf not in data['diverge']

883

data['diverge'][sf] = [f, of]

884

data['diverge'][sf] = [f, of]

884

break

885

break

885

return

886

return

886

887

if of in mta:

888

if of in mta:

888

if backwards or remotebase:

889

if backwards or remotebase:

889

data['incomplete'][of] = f

890

data['incomplete'][of] = f

890

else:

891

else:

891

for sf in seen:

892

for sf in seen:

892

if sf in mb:

893

if sf in mb:

893

if tca == base:

894

if tca == base:

894

data['diverge'].setdefault(sf, []).append(f)

895

data['diverge'].setdefault(sf, []).append(f)

895

else:

896

else:

896

data['incompletediverge'][sf] = [of, f]

897

data['incompletediverge'][sf] = [of, f]

897

return

898

return

898

899

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

900

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

900

"""reproduce copies from fromrev to rev in the dirstate

901

"""reproduce copies from fromrev to rev in the dirstate

901

902

If skiprev is specified, it's a revision that should be used to

903

If skiprev is specified, it's a revision that should be used to

903

filter copy records. Any copies that occur between fromrev and

904

filter copy records. Any copies that occur between fromrev and

904

skiprev will not be duplicated, even if they appear in the set of

905

skiprev will not be duplicated, even if they appear in the set of

905

copies between fromrev and rev.

906

copies between fromrev and rev.

906

"""

907

"""

907

exclude = {}

908

exclude = {}

908

ctraceconfig = repo.ui.config('experimental', 'copytrace')

909

ctraceconfig = repo.ui.config('experimental', 'copytrace')

909

bctrace = stringutil.parsebool(ctraceconfig)

910

bctrace = stringutil.parsebool(ctraceconfig)

910

if (skiprev is not None and

911

if (skiprev is not None and

911

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

912

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

912

# copytrace='off' skips this line, but not the entire function because

913

# copytrace='off' skips this line, but not the entire function because

913

# the line below is O(size of the repo) during a rebase, while the rest

914

# the line below is O(size of the repo) during a rebase, while the rest

914

# of the function is much faster (and is required for carrying copy

915

# of the function is much faster (and is required for carrying copy

915

# metadata across the rebase anyway).

916

# metadata across the rebase anyway).

916

exclude = pathcopies(repo[fromrev], repo[skiprev])

917

exclude = pathcopies(repo[fromrev], repo[skiprev])

917

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

918

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

918

# copies.pathcopies returns backward renames, so dst might not

919

# copies.pathcopies returns backward renames, so dst might not

919

# actually be in the dirstate

920

# actually be in the dirstate

920

if dst in exclude:

921

if dst in exclude:

921

continue

922

continue

922

wctx[dst].markcopied(src)

923

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in list(t.items()):
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d, match=None):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg('debug.copies:    looking into rename from %s to %s\n'
                         % (a, b))
                 limit = _findlimit(repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 if debug:
                     dbg('debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg('debug.copies:      missing file to search: %d\n' % len(missing))
                 for f in missing:
                     if debug:
                         dbg('debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         if debug:
                             dbg('debug.copies:          rename of: %s\n' % ofctx._path)
                         cm[f] = ofctx.path()
                     if debug:
                         dbg('debug.copies:          time: %f seconds\n'
                             % (util.timer() - start))
                 return cm
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 if debug:
                     repo.ui.debug('debug.copies: searching copies from %s to %s\n'
                                   % (x, y))
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: forward\n')
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: backward\n')
                     return _backwardrenames(x, y)
                 if debug:
                     repo.ui.debug('debug.copies: search mode: combined\n')
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirfilenodeids:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 boolctrace = stringutil.parsebool(copytracing)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 elif boolctrace is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.isancestorof(wc) is False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not base.isancestorof(_c1)
                 dirtyc2 = not base.isancestorof(_c2)
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
-                    u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
+                    u1u, u2u = _computenonoverlap(repo, c1, c2,
-                                                                m2.filesnotin(mta),
+                                                  m1.filesnotin(mta, repo.narrowmatch()),
+                                                  m2.filesnotin(mta, repo.narrowmatch()),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if oc.linkrev() < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config('experimental', 'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if (skiprev is not None and
                     (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)

               $ . "$TESTDIR/narrow-library.sh"
             create full repo
               $ hg init master
               $ cd master
               $ mkdir inside
               $ echo inside > inside/f1
               $ mkdir outside
               $ echo outside > outside/f2
               $ hg ci -Aqm 'initial'
               $ hg mv outside/f2 inside/f2
               $ hg ci -qm 'move f2 from outside'
               $ echo modified > inside/f2
               $ hg ci -qm 'modify inside/f2'
               $ mkdir outside
               $ echo new > outside/f3
               $ hg ci -Aqm 'add outside/f3'
               $ cd ..
               $ hg clone --narrow ssh://user@dummy/master narrow --include inside -r 2
               adding changesets
               adding manifests
               adding file changes
               added 3 changesets with 3 changes to 2 files
               new changesets *:* (glob)
               updating to branch default
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               $ cd narrow
               $ hg co 'desc("move f2")'
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               $ hg status
               $ hg diff
               $ hg diff --change . --git
               diff --git a/inside/f2 b/inside/f2
               new file mode 100644
               --- /dev/null
               +++ b/inside/f2
               @@ -0,0 +1,1 @@
               +outside
               $ hg log --follow inside/f2 -r tip
               changeset:   2:bcfb756e0ca9
               tag:         tip
               user:        test
               date:        Thu Jan 01 00:00:00 1970 +0000
               summary:     modify inside/f2
               changeset:   1:5a016133b2bb
               user:        test
               date:        Thu Jan 01 00:00:00 1970 +0000
               summary:     move f2 from outside
               $ echo new > inside/f4
               $ hg ci -Aqm 'add inside/f4'
               $ hg pull -q
               $ hg --config extensions.rebase= rebase -d tip
               rebasing 3:4f84b666728c "add inside/f4"
               saved backup bundle to $TESTTMP/narrow/.hg/strip-backup/4f84b666728c-4269b76e-rebase.hg
               $ hg co -q 0
               $ echo modified > inside/f1
               $ hg ci -qm 'modify inside/f1'
               $ echo new > inside/f5
               $ hg ci -Aqm 'add inside/f5'
               $ hg --config extensions.rebase= rebase -d 'public()' -r .
               rebasing 6:610b60178c28 "add inside/f5" (tip)
-              abort: data/outside/f3.i@54e53435331b: no match found!
+              saved backup bundle to $TESTTMP/narrow/.hg/strip-backup/610b60178c28-65716a78-rebase.hg
-              [255]