upstream/mercurial-mirror Commit - r41919:3158cb74

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

scmutil,

20

scmutil,

21

util,

21

util,

22

)

22

)

23

from .utils import (

23

from .utils import (

24

stringutil,

24

stringutil,

25

)

25

)

26

27

def _findlimit(repo, ctxa, ctxb):

27

def _findlimit(repo, ctxa, ctxb):

28

"""

28

"""

29

Find the last revision that needs to be checked to ensure that a full

29

Find the last revision that needs to be checked to ensure that a full

30

transitive closure for file copies can be properly calculated.

30

transitive closure for file copies can be properly calculated.

31

Generally, this means finding the earliest revision number that's an

31

Generally, this means finding the earliest revision number that's an

32

ancestor of a or b but not both, except when a or b is a direct descendent

32

ancestor of a or b but not both, except when a or b is a direct descendent

33

of the other, in which case we can return the minimum revnum of a and b.

33

of the other, in which case we can return the minimum revnum of a and b.

34

"""

34

"""

35

36

# basic idea:

36

# basic idea:

37

# - mark a and b with different sides

37

# - mark a and b with different sides

38

# - if a parent's children are all on the same side, the parent is

38

# - if a parent's children are all on the same side, the parent is

39

# on that side, otherwise it is on no side

39

# on that side, otherwise it is on no side

40

# - walk the graph in topological order with the help of a heap;

40

# - walk the graph in topological order with the help of a heap;

41

# - add unseen parents to side map

41

# - add unseen parents to side map

42

# - clear side of any parent that has children on different sides

42

# - clear side of any parent that has children on different sides

43

# - track number of interesting revs that might still be on a side

43

# - track number of interesting revs that might still be on a side

44

# - track the lowest interesting rev seen

44

# - track the lowest interesting rev seen

45

# - quit when interesting revs is zero

45

# - quit when interesting revs is zero

46

47

cl = repo.changelog

47

cl = repo.changelog

48

wdirparents = None

48

wdirparents = None

49

a = ctxa.rev()

49

a = ctxa.rev()

50

b = ctxb.rev()

50

b = ctxb.rev()

51

if a is None:

51

if a is None:

52

wdirparents = (ctxa.p1(), ctxa.p2())

52

wdirparents = (ctxa.p1(), ctxa.p2())

53

a = node.wdirrev

53

a = node.wdirrev

54

if b is None:

54

if b is None:

55

assert not wdirparents

55

assert not wdirparents

56

wdirparents = (ctxb.p1(), ctxb.p2())

56

wdirparents = (ctxb.p1(), ctxb.p2())

57

b = node.wdirrev

57

b = node.wdirrev

58

59

side = {a: -1, b: 1}

59

side = {a: -1, b: 1}

60

visit = [-a, -b]

60

visit = [-a, -b]

61

heapq.heapify(visit)

61

heapq.heapify(visit)

62

interesting = len(visit)

62

interesting = len(visit)

63

limit = node.wdirrev

63

limit = node.wdirrev

64

65

while interesting:

65

while interesting:

66

r = -heapq.heappop(visit)

66

r = -heapq.heappop(visit)

67

if r == node.wdirrev:

67

if r == node.wdirrev:

68

parents = [pctx.rev() for pctx in wdirparents]

68

parents = [pctx.rev() for pctx in wdirparents]

69

else:

69

else:

70

parents = cl.parentrevs(r)

70

parents = cl.parentrevs(r)

71

if parents[1] == node.nullrev:

71

if parents[1] == node.nullrev:

72

parents = parents[:1]

72

parents = parents[:1]

73

for p in parents:

73

for p in parents:

74

if p not in side:

74

if p not in side:

75

# first time we see p; add it to visit

75

# first time we see p; add it to visit

76

side[p] = side[r]

76

side[p] = side[r]

77

if side[p]:

77

if side[p]:

78

interesting += 1

78

interesting += 1

79

heapq.heappush(visit, -p)

79

heapq.heappush(visit, -p)

80

elif side[p] and side[p] != side[r]:

80

elif side[p] and side[p] != side[r]:

81

# p was interesting but now we know better

81

# p was interesting but now we know better

82

side[p] = 0

82

side[p] = 0

83

interesting -= 1

83

interesting -= 1

84

if side[r]:

84

if side[r]:

85

limit = r # lowest rev visited

85

limit = r # lowest rev visited

86

interesting -= 1

86

interesting -= 1

87

88

# Consider the following flow (see test-commit-amend.t under issue4405):

88

# Consider the following flow (see test-commit-amend.t under issue4405):

89

# 1/ File 'a0' committed

89

# 1/ File 'a0' committed

90

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

90

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

91

# 3/ Move back to first commit

91

# 3/ Move back to first commit

92

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

92

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

93

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

93

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

94

#

94

#

95

# During the amend in step five, we will be in this state:

95

# During the amend in step five, we will be in this state:

96

#

96

#

97

# @ 3 temporary amend commit for a1-amend

97

# @ 3 temporary amend commit for a1-amend

98

# |

98

# |

99

# o 2 a1-amend

99

# o 2 a1-amend

100

# |

100

# |

101

# | o 1 a1

101

# | o 1 a1

102

# |/

102

# |/

103

# o 0 a0

103

# o 0 a0

104

#

104

#

105

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

105

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

106

# yet the filelog has the copy information in rev 1 and we will not look

106

# yet the filelog has the copy information in rev 1 and we will not look

107

# back far enough unless we also look at the a and b as candidates.

107

# back far enough unless we also look at the a and b as candidates.

108

# This only occurs when a is a descendent of b or visa-versa.

108

# This only occurs when a is a descendent of b or visa-versa.

109

return min(limit, a, b)

109

return min(limit, a, b)

110

111

def _chain(src, dst, a, b):

111

def _chain(src, dst, a, b):

112

"""chain two sets of copies a->b"""

112

"""chain two sets of copies a->b"""

113

t = a.copy()

113

t = a.copy()

114

for k, v in b.iteritems():

114

for k, v in b.iteritems():

115

if v in t:

115

if v in t:

116

# found a chain

116

# found a chain

117

if t[v] != k:

117

if t[v] != k:

118

# file wasn't renamed back to itself

118

# file wasn't renamed back to itself

119

t[k] = t[v]

119

t[k] = t[v]

120

if v not in dst:

120

if v not in dst:

121

# chain was a rename, not a copy

121

# chain was a rename, not a copy

122

del t[v]

122

del t[v]

123

if v in src:

123

if v in src:

124

# file is a copy of an existing file

124

# file is a copy of an existing file

125

t[k] = v

125

t[k] = v

126

127

# remove criss-crossed copies

127

# remove criss-crossed copies

128

for k, v in list(t.items()):

128

for k, v in list(t.items()):

129

if k in src and v in dst:

129

if k in src and v in dst:

130

del t[k]

130

del t[k]

131

132

return t

132

return t

133

134

def _tracefile(fctx, am, limit=node.nullrev):

134

def _tracefile(fctx, am, limit=node.nullrev):

135

"""return file context that is the ancestor of fctx present in ancestor

135

"""return file context that is the ancestor of fctx present in ancestor

136

manifest am, stopping after the first ancestor lower than limit"""

136

manifest am, stopping after the first ancestor lower than limit"""

137

138

for f in fctx.ancestors():

138

for f in fctx.ancestors():

139

if am.get(f.path(), None) == f.filenode():

139

if am.get(f.path(), None) == f.filenode():

140

return f

140

return f

141

if limit >= 0 and not f.isintroducedafter(limit):

141

if limit >= 0 and not f.isintroducedafter(limit):

142

return None

142

return None

143

144

def _dirstatecopies(repo, match=None):

144

def _dirstatecopies(repo, match=None):

145

ds = repo.dirstate

145

ds = repo.dirstate

146

c = ds.copies().copy()

146

c = ds.copies().copy()

147

for k in list(c):

147

for k in list(c):

148

if ds[k] not in 'anm' or (match and not match(k)):

148

if ds[k] not in 'anm' or (match and not match(k)):

149

del c[k]

149

del c[k]

150

return c

150

return c

151

152

def _computeforwardmissing(a, b, match=None):

152

def _computeforwardmissing(a, b, match=None):

153

"""Computes which files are in b but not a.

153

"""Computes which files are in b but not a.

154

This is its own function so extensions can easily wrap this call to see what

154

This is its own function so extensions can easily wrap this call to see what

155

files _forwardcopies is about to process.

155

files _forwardcopies is about to process.

156

"""

156

"""

157

ma = a.manifest()

157

ma = a.manifest()

158

mb = b.manifest()

158

mb = b.manifest()

159

return mb.filesnotin(ma, match=match)

159

return mb.filesnotin(ma, match=match)

160

161

def _committedforwardcopies(a, b, match):

161

def _committedforwardcopies(a, b, match):

162

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

162

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

163

# files might have to be traced back to the fctx parent of the last

163

# files might have to be traced back to the fctx parent of the last

164

# one-side-only changeset, but not further back than that

164

# one-side-only changeset, but not further back than that

165

repo = a._repo

165

repo = a._repo

166

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

166

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

167

dbg = repo.ui.debug

167

dbg = repo.ui.debug

168

if debug:

168

if debug:

169

dbg('debug.copies: looking into rename from %s to %s\n'

169

dbg('debug.copies: looking into rename from %s to %s\n'

170

% (a, b))

170

% (a, b))

171

limit = _findlimit(repo, a, b)

171

limit = _findlimit(repo, a, b)

172

if debug:

172

if debug:

173

dbg('debug.copies: search limit: %d\n' % limit)

173

dbg('debug.copies: search limit: %d\n' % limit)

174

am = a.manifest()

174

am = a.manifest()

175

176

# find where new files came from

176

# find where new files came from

177

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

178

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

179

cm = {}

179

cm = {}

180

181

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

182

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

183

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

184

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

185

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

186

# this comparison.

186

# this comparison.

187

forwardmissingmatch = match

187

forwardmissingmatch = match

188

if b.p1() == a and b.p2().node() == node.nullid:

188

if b.p1() == a and b.p2().node() == node.nullid:

189

filesmatcher = scmutil.matchfiles(a._repo, b.files())

189

filesmatcher = scmutil.matchfiles(a._repo, b.files())

190

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

190

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

191

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

192

193

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

194

195

if debug:

195

if debug:

196

dbg('debug.copies: missing file to search: %d\n' % len(missing))

196

dbg('debug.copies: missing file to search: %d\n' % len(missing))

197

198

for f in missing:

198

for f in missing:

199

if debug:

199

if debug:

200

dbg('debug.copies: tracing file: %s\n' % f)

200

dbg('debug.copies: tracing file: %s\n' % f)

201

fctx = b[f]

201

fctx = b[f]

202

fctx._ancestrycontext = ancestrycontext

202

fctx._ancestrycontext = ancestrycontext

203

204

if debug:

204

if debug:

205

start = util.timer()

205

start = util.timer()

206

ofctx = _tracefile(fctx, am, limit)

206

ofctx = _tracefile(fctx, am, limit)

207

if ofctx:

207

if ofctx:

208

if debug:

208

if debug:

209

dbg('debug.copies: rename of: %s\n' % ofctx._path)

209

dbg('debug.copies: rename of: %s\n' % ofctx._path)

210

cm[f] = ofctx.path()

210

cm[f] = ofctx.path()

211

if debug:

211

if debug:

212

dbg('debug.copies: time: %f seconds\n'

212

dbg('debug.copies: time: %f seconds\n'

213

% (util.timer() - start))

213

% (util.timer() - start))

214

return cm

214

return cm

215

216

def _forwardcopies(a, b, match=None):

216

def _forwardcopies(a, b, match=None):

217

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

217

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

218

219

match = a.repo().narrowmatch(match)

219

match = a.repo().narrowmatch(match)

220

# check for working copy

220

# check for working copy

221

if b.rev() is None:

221

if b.rev() is None:

222

if a == b.p1():

222

if a == b.p1():

223

# short-circuit to avoid issues with merge states

223

# short-circuit to avoid issues with merge states

224

return _dirstatecopies(b._repo, match)

224

return _dirstatecopies(b._repo, match)

225

226

cm = _committedforwardcopies(a, b.p1(), match)

226

cm = _committedforwardcopies(a, b.p1(), match)

227

# combine copies from dirstate if necessary

227

# combine copies from dirstate if necessary

228

return _chain(a, b, cm, _dirstatecopies(b._repo, match))

228

return _chain(a, b, cm, _dirstatecopies(b._repo, match))

229

return _committedforwardcopies(a, b, match)

229

return _committedforwardcopies(a, b, match)

230

231

def _backwardrenames(a, b):

231

def _backwardrenames(a, b, match):

232

if a._repo.ui.config('experimental', 'copytrace') == 'off':

232

if a._repo.ui.config('experimental', 'copytrace') == 'off':

233

return {}

233

return {}

234

235

# Even though we're not taking copies into account, 1:n rename situations

235

# Even though we're not taking copies into account, 1:n rename situations

236

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

236

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

237

# arbitrarily pick one of the renames.

237

# arbitrarily pick one of the renames.

238

# We don't want to pass in "match" here, since that would filter

239

# the destination by it. Since we're reversing the copies, we want

240

# to filter the source instead.

238

f = _forwardcopies(b, a)

241

f = _forwardcopies(b, a)

239

r = {}

242

r = {}

240

for k, v in sorted(f.iteritems()):

243

for k, v in sorted(f.iteritems()):

244

if match and not match(v):

245

continue

241

# remove copies

246

# remove copies

242

if v in a:

247

if v in a:

243

continue

248

continue

244

r[v] = k

249

r[v] = k

245

return r

250

return r

246

251

247

def pathcopies(x, y, match=None):

252

def pathcopies(x, y, match=None):

248

"""find {dst@y: src@x} copy mapping for directed compare"""

253

"""find {dst@y: src@x} copy mapping for directed compare"""

249

repo = x._repo

254

repo = x._repo

250

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

255

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

251

if debug:

256

if debug:

252

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

257

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

253

% (x, y))

258

% (x, y))

254

if x == y or not x or not y:

259

if x == y or not x or not y:

255

return {}

260

return {}

256

a = y.ancestor(x)

261

a = y.ancestor(x)

257

if a == x:

262

if a == x:

258

if debug:

263

if debug:

259

repo.ui.debug('debug.copies: search mode: forward\n')

264

repo.ui.debug('debug.copies: search mode: forward\n')

260

return _forwardcopies(x, y, match=match)

265

return _forwardcopies(x, y, match=match)

261

if a == y:

266

if a == y:

262

if debug:

267

if debug:

263

repo.ui.debug('debug.copies: search mode: backward\n')

268

repo.ui.debug('debug.copies: search mode: backward\n')

264

return _backwardrenames(x, y)

269

return _backwardrenames(x, y, match=match)

265

if debug:

270

if debug:

266

repo.ui.debug('debug.copies: search mode: combined\n')

271

repo.ui.debug('debug.copies: search mode: combined\n')

267

return _chain(x, y, _backwardrenames(x, a),

272

return _chain(x, y, _backwardrenames(x, a, match=match),

268

_forwardcopies(a, y, match=match))

273

_forwardcopies(a, y, match=match))

269

274

270

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

275

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

271

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

276

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

272

and c2. This is its own function so extensions can easily wrap this call

277

and c2. This is its own function so extensions can easily wrap this call

273

to see what files mergecopies is about to process.

278

to see what files mergecopies is about to process.

274

279

275

Even though c1 and c2 are not used in this function, they are useful in

280

Even though c1 and c2 are not used in this function, they are useful in

276

other extensions for being able to read the file nodes of the changed files.

281

other extensions for being able to read the file nodes of the changed files.

277

282

278

"baselabel" can be passed to help distinguish the multiple computations

283

"baselabel" can be passed to help distinguish the multiple computations

279

done in the graft case.

284

done in the graft case.

280

"""

285

"""

281

u1 = sorted(addedinm1 - addedinm2)

286

u1 = sorted(addedinm1 - addedinm2)

282

u2 = sorted(addedinm2 - addedinm1)

287

u2 = sorted(addedinm2 - addedinm1)

283

288

284

header = " unmatched files in %s"

289

header = " unmatched files in %s"

285

if baselabel:

290

if baselabel:

286

header += ' (from %s)' % baselabel

291

header += ' (from %s)' % baselabel

287

if u1:

292

if u1:

288

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

293

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

289

if u2:

294

if u2:

290

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

295

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

291

296

292

return u1, u2

297

return u1, u2

293

298

294

def _makegetfctx(ctx):

299

def _makegetfctx(ctx):

295

"""return a 'getfctx' function suitable for _checkcopies usage

300

"""return a 'getfctx' function suitable for _checkcopies usage

296

301

297

We have to re-setup the function building 'filectx' for each

302

We have to re-setup the function building 'filectx' for each

298

'_checkcopies' to ensure the linkrev adjustment is properly setup for

303

'_checkcopies' to ensure the linkrev adjustment is properly setup for

299

each. Linkrev adjustment is important to avoid bug in rename

304

each. Linkrev adjustment is important to avoid bug in rename

300

detection. Moreover, having a proper '_ancestrycontext' setup ensures

305

detection. Moreover, having a proper '_ancestrycontext' setup ensures

301

the performance impact of this adjustment is kept limited. Without it,

306

the performance impact of this adjustment is kept limited. Without it,

302

each file could do a full dag traversal making the time complexity of

307

each file could do a full dag traversal making the time complexity of

303

the operation explode (see issue4537).

308

the operation explode (see issue4537).

304

309

305

This function exists here mostly to limit the impact on stable. Feel

310

This function exists here mostly to limit the impact on stable. Feel

306

free to refactor on default.

311

free to refactor on default.

307

"""

312

"""

308

rev = ctx.rev()

313

rev = ctx.rev()

309

repo = ctx._repo

314

repo = ctx._repo

310

ac = getattr(ctx, '_ancestrycontext', None)

315

ac = getattr(ctx, '_ancestrycontext', None)

311

if ac is None:

316

if ac is None:

312

revs = [rev]

317

revs = [rev]

313

if rev is None:

318

if rev is None:

314

revs = [p.rev() for p in ctx.parents()]

319

revs = [p.rev() for p in ctx.parents()]

315

ac = repo.changelog.ancestors(revs, inclusive=True)

320

ac = repo.changelog.ancestors(revs, inclusive=True)

316

ctx._ancestrycontext = ac

321

ctx._ancestrycontext = ac

317

def makectx(f, n):

322

def makectx(f, n):

318

if n in node.wdirfilenodeids: # in a working context?

323

if n in node.wdirfilenodeids: # in a working context?

319

if ctx.rev() is None:

324

if ctx.rev() is None:

320

return ctx.filectx(f)

325

return ctx.filectx(f)

321

return repo[None][f]

326

return repo[None][f]

322

fctx = repo.filectx(f, fileid=n)

327

fctx = repo.filectx(f, fileid=n)

323

# setup only needed for filectx not create from a changectx

328

# setup only needed for filectx not create from a changectx

324

fctx._ancestrycontext = ac

329

fctx._ancestrycontext = ac

325

fctx._descendantrev = rev

330

fctx._descendantrev = rev

326

return fctx

331

return fctx

327

return util.lrucachefunc(makectx)

332

return util.lrucachefunc(makectx)

328

333

329

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

334

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

330

"""combine partial copy paths"""

335

"""combine partial copy paths"""

331

remainder = {}

336

remainder = {}

332

for f in copyfrom:

337

for f in copyfrom:

333

if f in copyto:

338

if f in copyto:

334

finalcopy[copyto[f]] = copyfrom[f]

339

finalcopy[copyto[f]] = copyfrom[f]

335

del copyto[f]

340

del copyto[f]

336

for f in incompletediverge:

341

for f in incompletediverge:

337

assert f not in diverge

342

assert f not in diverge

338

ic = incompletediverge[f]

343

ic = incompletediverge[f]

339

if ic[0] in copyto:

344

if ic[0] in copyto:

340

diverge[f] = [copyto[ic[0]], ic[1]]

345

diverge[f] = [copyto[ic[0]], ic[1]]

341

else:

346

else:

342

remainder[f] = ic

347

remainder[f] = ic

343

return remainder

348

return remainder

344

349

345

def mergecopies(repo, c1, c2, base):

350

def mergecopies(repo, c1, c2, base):

346

"""

351

"""

347

The function calling different copytracing algorithms on the basis of config

352

The function calling different copytracing algorithms on the basis of config

348

which find moves and copies between context c1 and c2 that are relevant for

353

which find moves and copies between context c1 and c2 that are relevant for

349

merging. 'base' will be used as the merge base.

354

merging. 'base' will be used as the merge base.

350

355

351

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

356

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

352

files that were moved/ copied in one merge parent and modified in another.

357

files that were moved/ copied in one merge parent and modified in another.

353

For example:

358

For example:

354

359

355

o ---> 4 another commit

360

o ---> 4 another commit

356

|

361

|

357

| o ---> 3 commit that modifies a.txt

362

| o ---> 3 commit that modifies a.txt

358

| /

363

| /

359

o / ---> 2 commit that moves a.txt to b.txt

364

o / ---> 2 commit that moves a.txt to b.txt

360

|/

365

|/

361

o ---> 1 merge base

366

o ---> 1 merge base

362

367

363

If we try to rebase revision 3 on revision 4, since there is no a.txt in

368

If we try to rebase revision 3 on revision 4, since there is no a.txt in

364

revision 4, and if user have copytrace disabled, we prints the following

369

revision 4, and if user have copytrace disabled, we prints the following

365

message:

370

message:

366

371

367

```other changed <file> which local deleted```

372

```other changed <file> which local deleted```

368

373

369

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

374

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

370

"dirmove".

375

"dirmove".

371

376

372

"copy" is a mapping from destination name -> source name,

377

"copy" is a mapping from destination name -> source name,

373

where source is in c1 and destination is in c2 or vice-versa.

378

where source is in c1 and destination is in c2 or vice-versa.

374

379

375

"movewithdir" is a mapping from source name -> destination name,

380

"movewithdir" is a mapping from source name -> destination name,

376

where the file at source present in one context but not the other

381

where the file at source present in one context but not the other

377

needs to be moved to destination by the merge process, because the

382

needs to be moved to destination by the merge process, because the

378

other context moved the directory it is in.

383

other context moved the directory it is in.

379

384

380

"diverge" is a mapping of source name -> list of destination names

385

"diverge" is a mapping of source name -> list of destination names

381

for divergent renames.

386

for divergent renames.

382

387

383

"renamedelete" is a mapping of source name -> list of destination

388

"renamedelete" is a mapping of source name -> list of destination

384

names for files deleted in c1 that were renamed in c2 or vice-versa.

389

names for files deleted in c1 that were renamed in c2 or vice-versa.

385

390

386

"dirmove" is a mapping of detected source dir -> destination dir renames.

391

"dirmove" is a mapping of detected source dir -> destination dir renames.

387

This is needed for handling changes to new files previously grafted into

392

This is needed for handling changes to new files previously grafted into

388

renamed directories.

393

renamed directories.

389

"""

394

"""

390

# avoid silly behavior for update from empty dir

395

# avoid silly behavior for update from empty dir

391

if not c1 or not c2 or c1 == c2:

396

if not c1 or not c2 or c1 == c2:

392

return {}, {}, {}, {}, {}

397

return {}, {}, {}, {}, {}

393

398

394

narrowmatch = c1.repo().narrowmatch()

399

narrowmatch = c1.repo().narrowmatch()

395

400

396

# avoid silly behavior for parent -> working dir

401

# avoid silly behavior for parent -> working dir

397

if c2.node() is None and c1.node() == repo.dirstate.p1():

402

if c2.node() is None and c1.node() == repo.dirstate.p1():

398

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

403

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

399

404

400

copytracing = repo.ui.config('experimental', 'copytrace')

405

copytracing = repo.ui.config('experimental', 'copytrace')

401

boolctrace = stringutil.parsebool(copytracing)

406

boolctrace = stringutil.parsebool(copytracing)

402

407

403

# Copy trace disabling is explicitly below the node == p1 logic above

408

# Copy trace disabling is explicitly below the node == p1 logic above

404

# because the logic above is required for a simple copy to be kept across a

409

# because the logic above is required for a simple copy to be kept across a

405

# rebase.

410

# rebase.

406

if copytracing == 'heuristics':

411

if copytracing == 'heuristics':

407

# Do full copytracing if only non-public revisions are involved as

412

# Do full copytracing if only non-public revisions are involved as

408

# that will be fast enough and will also cover the copies which could

413

# that will be fast enough and will also cover the copies which could

409

# be missed by heuristics

414

# be missed by heuristics

410

if _isfullcopytraceable(repo, c1, base):

415

if _isfullcopytraceable(repo, c1, base):

411

return _fullcopytracing(repo, c1, c2, base)

416

return _fullcopytracing(repo, c1, c2, base)

412

return _heuristicscopytracing(repo, c1, c2, base)

417

return _heuristicscopytracing(repo, c1, c2, base)

413

elif boolctrace is False:

418

elif boolctrace is False:

414

# stringutil.parsebool() returns None when it is unable to parse the

419

# stringutil.parsebool() returns None when it is unable to parse the

415

# value, so we should rely on making sure copytracing is on such cases

420

# value, so we should rely on making sure copytracing is on such cases

416

return {}, {}, {}, {}, {}

421

return {}, {}, {}, {}, {}

417

else:

422

else:

418

return _fullcopytracing(repo, c1, c2, base)

423

return _fullcopytracing(repo, c1, c2, base)

419

424

420

def _isfullcopytraceable(repo, c1, base):

425

def _isfullcopytraceable(repo, c1, base):

421

""" Checks that if base, source and destination are all no-public branches,

426

""" Checks that if base, source and destination are all no-public branches,

422

if yes let's use the full copytrace algorithm for increased capabilities

427

if yes let's use the full copytrace algorithm for increased capabilities

423

since it will be fast enough.

428

since it will be fast enough.

424

429

425

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

430

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

426

number of changesets from c1 to base such that if number of changesets are

431

number of changesets from c1 to base such that if number of changesets are

427

more than the limit, full copytracing algorithm won't be used.

432

more than the limit, full copytracing algorithm won't be used.

428

"""

433

"""

429

if c1.rev() is None:

434

if c1.rev() is None:

430

c1 = c1.p1()

435

c1 = c1.p1()

431

if c1.mutable() and base.mutable():

436

if c1.mutable() and base.mutable():

432

sourcecommitlimit = repo.ui.configint('experimental',

437

sourcecommitlimit = repo.ui.configint('experimental',

433

'copytrace.sourcecommitlimit')

438

'copytrace.sourcecommitlimit')

434

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

439

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

435

return commits < sourcecommitlimit

440

return commits < sourcecommitlimit

436

return False

441

return False

437

442

438

def _fullcopytracing(repo, c1, c2, base):

443

def _fullcopytracing(repo, c1, c2, base):

439

""" The full copytracing algorithm which finds all the new files that were

444

""" The full copytracing algorithm which finds all the new files that were

440

added from merge base up to the top commit and for each file it checks if

445

added from merge base up to the top commit and for each file it checks if

441

this file was copied from another file.

446

this file was copied from another file.

442

447

443

This is pretty slow when a lot of changesets are involved but will track all

448

This is pretty slow when a lot of changesets are involved but will track all

444

the copies.

449

the copies.

445

"""

450

"""

446

# In certain scenarios (e.g. graft, update or rebase), base can be

451

# In certain scenarios (e.g. graft, update or rebase), base can be

447

# overridden We still need to know a real common ancestor in this case We

452

# overridden We still need to know a real common ancestor in this case We

448

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

453

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

449

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

454

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

450

# caller may not know if the revision passed in lieu of the CA is a genuine

455

# caller may not know if the revision passed in lieu of the CA is a genuine

451

# common ancestor or not without explicitly checking it, it's better to

456

# common ancestor or not without explicitly checking it, it's better to

452

# determine that here.

457

# determine that here.

453

#

458

#

454

# base.isancestorof(wc) is False, work around that

459

# base.isancestorof(wc) is False, work around that

455

_c1 = c1.p1() if c1.rev() is None else c1

460

_c1 = c1.p1() if c1.rev() is None else c1

456

_c2 = c2.p1() if c2.rev() is None else c2

461

_c2 = c2.p1() if c2.rev() is None else c2

457

# an endpoint is "dirty" if it isn't a descendant of the merge base

462

# an endpoint is "dirty" if it isn't a descendant of the merge base

458

# if we have a dirty endpoint, we need to trigger graft logic, and also

463

# if we have a dirty endpoint, we need to trigger graft logic, and also

459

# keep track of which endpoint is dirty

464

# keep track of which endpoint is dirty

460

dirtyc1 = not base.isancestorof(_c1)

465

dirtyc1 = not base.isancestorof(_c1)

461

dirtyc2 = not base.isancestorof(_c2)

466

dirtyc2 = not base.isancestorof(_c2)

462

graft = dirtyc1 or dirtyc2

467

graft = dirtyc1 or dirtyc2

463

tca = base

468

tca = base

464

if graft:

469

if graft:

465

tca = _c1.ancestor(_c2)

470

tca = _c1.ancestor(_c2)

466

471

467

limit = _findlimit(repo, c1, c2)

472

limit = _findlimit(repo, c1, c2)

468

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

473

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

469

474

470

m1 = c1.manifest()

475

m1 = c1.manifest()

471

m2 = c2.manifest()

476

m2 = c2.manifest()

472

mb = base.manifest()

477

mb = base.manifest()

473

478

474

# gather data from _checkcopies:

479

# gather data from _checkcopies:

475

# - diverge = record all diverges in this dict

480

# - diverge = record all diverges in this dict

476

# - copy = record all non-divergent copies in this dict

481

# - copy = record all non-divergent copies in this dict

477

# - fullcopy = record all copies in this dict

482

# - fullcopy = record all copies in this dict

478

# - incomplete = record non-divergent partial copies here

483

# - incomplete = record non-divergent partial copies here

479

# - incompletediverge = record divergent partial copies here

484

# - incompletediverge = record divergent partial copies here

480

diverge = {} # divergence data is shared

485

diverge = {} # divergence data is shared

481

incompletediverge = {}

486

incompletediverge = {}

482

data1 = {'copy': {},

487

data1 = {'copy': {},

483

'fullcopy': {},

488

'fullcopy': {},

484

'incomplete': {},

489

'incomplete': {},

485

'diverge': diverge,

490

'diverge': diverge,

486

'incompletediverge': incompletediverge,

491

'incompletediverge': incompletediverge,

487

}

492

}

488

data2 = {'copy': {},

493

data2 = {'copy': {},

489

'fullcopy': {},

494

'fullcopy': {},

490

'incomplete': {},

495

'incomplete': {},

491

'diverge': diverge,

496

'diverge': diverge,

492

'incompletediverge': incompletediverge,

497

'incompletediverge': incompletediverge,

493

}

498

}

494

499

495

# find interesting file sets from manifests

500

# find interesting file sets from manifests

496

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

501

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

497

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

502

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

498

bothnew = sorted(addedinm1 & addedinm2)

503

bothnew = sorted(addedinm1 & addedinm2)

499

if tca == base:

504

if tca == base:

500

# unmatched file from base

505

# unmatched file from base

501

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

506

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

502

u1u, u2u = u1r, u2r

507

u1u, u2u = u1r, u2r

503

else:

508

else:

504

# unmatched file from base (DAG rotation in the graft case)

509

# unmatched file from base (DAG rotation in the graft case)

505

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

510

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

506

baselabel='base')

511

baselabel='base')

507

# unmatched file from topological common ancestors (no DAG rotation)

512

# unmatched file from topological common ancestors (no DAG rotation)

508

# need to recompute this for directory move handling when grafting

513

# need to recompute this for directory move handling when grafting

509

mta = tca.manifest()

514

mta = tca.manifest()

510

u1u, u2u = _computenonoverlap(repo, c1, c2,

515

u1u, u2u = _computenonoverlap(repo, c1, c2,

511

m1.filesnotin(mta, repo.narrowmatch()),

516

m1.filesnotin(mta, repo.narrowmatch()),

512

m2.filesnotin(mta, repo.narrowmatch()),

517

m2.filesnotin(mta, repo.narrowmatch()),

513

baselabel='topological common ancestor')

518

baselabel='topological common ancestor')

514

519

515

for f in u1u:

520

for f in u1u:

516

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

521

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

517

522

518

for f in u2u:

523

for f in u2u:

519

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

524

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

520

525

521

copy = dict(data1['copy'])

526

copy = dict(data1['copy'])

522

copy.update(data2['copy'])

527

copy.update(data2['copy'])

523

fullcopy = dict(data1['fullcopy'])

528

fullcopy = dict(data1['fullcopy'])

524

fullcopy.update(data2['fullcopy'])

529

fullcopy.update(data2['fullcopy'])

525

530

526

if dirtyc1:

531

if dirtyc1:

527

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

532

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

528

incompletediverge)

533

incompletediverge)

529

else:

534

else:

530

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

535

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

531

incompletediverge)

536

incompletediverge)

532

537

533

renamedelete = {}

538

renamedelete = {}

534

renamedeleteset = set()

539

renamedeleteset = set()

535

divergeset = set()

540

divergeset = set()

536

for of, fl in list(diverge.items()):

541

for of, fl in list(diverge.items()):

537

if len(fl) == 1 or of in c1 or of in c2:

542

if len(fl) == 1 or of in c1 or of in c2:

538

del diverge[of] # not actually divergent, or not a rename

543

del diverge[of] # not actually divergent, or not a rename

539

if of not in c1 and of not in c2:

544

if of not in c1 and of not in c2:

540

# renamed on one side, deleted on the other side, but filter

545

# renamed on one side, deleted on the other side, but filter

541

# out files that have been renamed and then deleted

546

# out files that have been renamed and then deleted

542

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

547

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

543

renamedeleteset.update(fl) # reverse map for below

548

renamedeleteset.update(fl) # reverse map for below

544

else:

549

else:

545

divergeset.update(fl) # reverse map for below

550

divergeset.update(fl) # reverse map for below

546

551

547

if bothnew:

552

if bothnew:

548

repo.ui.debug(" unmatched files new in both:\n %s\n"

553

repo.ui.debug(" unmatched files new in both:\n %s\n"

549

% "\n ".join(bothnew))

554

% "\n ".join(bothnew))

550

bothdiverge = {}

555

bothdiverge = {}

551

bothincompletediverge = {}

556

bothincompletediverge = {}

552

remainder = {}

557

remainder = {}

553

both1 = {'copy': {},

558

both1 = {'copy': {},

554

'fullcopy': {},

559

'fullcopy': {},

555

'incomplete': {},

560

'incomplete': {},

556

'diverge': bothdiverge,

561

'diverge': bothdiverge,

557

'incompletediverge': bothincompletediverge

562

'incompletediverge': bothincompletediverge

558

}

563

}

559

both2 = {'copy': {},

564

both2 = {'copy': {},

560

'fullcopy': {},

565

'fullcopy': {},

561

'incomplete': {},

566

'incomplete': {},

562

'diverge': bothdiverge,

567

'diverge': bothdiverge,

563

'incompletediverge': bothincompletediverge

568

'incompletediverge': bothincompletediverge

564

}

569

}

565

for f in bothnew:

570

for f in bothnew:

566

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

571

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

567

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

572

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

568

if dirtyc1:

573

if dirtyc1:

569

# incomplete copies may only be found on the "dirty" side for bothnew

574

# incomplete copies may only be found on the "dirty" side for bothnew

570

assert not both2['incomplete']

575

assert not both2['incomplete']

571

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

576

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

572

bothincompletediverge)

577

bothincompletediverge)

573

elif dirtyc2:

578

elif dirtyc2:

574

assert not both1['incomplete']

579

assert not both1['incomplete']

575

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

580

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

576

bothincompletediverge)

581

bothincompletediverge)

577

else:

582

else:

578

# incomplete copies and divergences can't happen outside grafts

583

# incomplete copies and divergences can't happen outside grafts

579

assert not both1['incomplete']

584

assert not both1['incomplete']

580

assert not both2['incomplete']

585

assert not both2['incomplete']

581

assert not bothincompletediverge

586

assert not bothincompletediverge

582

for f in remainder:

587

for f in remainder:

583

assert f not in bothdiverge

588

assert f not in bothdiverge

584

ic = remainder[f]

589

ic = remainder[f]

585

if ic[0] in (m1 if dirtyc1 else m2):

590

if ic[0] in (m1 if dirtyc1 else m2):

586

# backed-out rename on one side, but watch out for deleted files

591

# backed-out rename on one side, but watch out for deleted files

587

bothdiverge[f] = ic

592

bothdiverge[f] = ic

588

for of, fl in bothdiverge.items():

593

for of, fl in bothdiverge.items():

589

if len(fl) == 2 and fl[0] == fl[1]:

594

if len(fl) == 2 and fl[0] == fl[1]:

590

copy[fl[0]] = of # not actually divergent, just matching renames

595

copy[fl[0]] = of # not actually divergent, just matching renames

591

596

592

if fullcopy and repo.ui.debugflag:

597

if fullcopy and repo.ui.debugflag:

593

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

598

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

594

"% = renamed and deleted):\n")

599

"% = renamed and deleted):\n")

595

for f in sorted(fullcopy):

600

for f in sorted(fullcopy):

596

note = ""

601

note = ""

597

if f in copy:

602

if f in copy:

598

note += "*"

603

note += "*"

599

if f in divergeset:

604

if f in divergeset:

600

note += "!"

605

note += "!"

601

if f in renamedeleteset:

606

if f in renamedeleteset:

602

note += "%"

607

note += "%"

603

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

608

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

604

note))

609

note))

605

del divergeset

610

del divergeset

606

611

607

if not fullcopy:

612

if not fullcopy:

608

return copy, {}, diverge, renamedelete, {}

613

return copy, {}, diverge, renamedelete, {}

609

614

610

repo.ui.debug(" checking for directory renames\n")

615

repo.ui.debug(" checking for directory renames\n")

611

616

612

# generate a directory move map

617

# generate a directory move map

613

d1, d2 = c1.dirs(), c2.dirs()

618

d1, d2 = c1.dirs(), c2.dirs()

614

# Hack for adding '', which is not otherwise added, to d1 and d2

619

# Hack for adding '', which is not otherwise added, to d1 and d2

615

d1.addpath('/')

620

d1.addpath('/')

616

d2.addpath('/')

621

d2.addpath('/')

617

invalid = set()

622

invalid = set()

618

dirmove = {}

623

dirmove = {}

619

624

620

# examine each file copy for a potential directory move, which is

625

# examine each file copy for a potential directory move, which is

621

# when all the files in a directory are moved to a new directory

626

# when all the files in a directory are moved to a new directory

622

for dst, src in fullcopy.iteritems():

627

for dst, src in fullcopy.iteritems():

623

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

628

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

624

if dsrc in invalid:

629

if dsrc in invalid:

625

# already seen to be uninteresting

630

# already seen to be uninteresting

626

continue

631

continue

627

elif dsrc in d1 and ddst in d1:

632

elif dsrc in d1 and ddst in d1:

628

# directory wasn't entirely moved locally

633

# directory wasn't entirely moved locally

629

invalid.add(dsrc)

634

invalid.add(dsrc)

630

elif dsrc in d2 and ddst in d2:

635

elif dsrc in d2 and ddst in d2:

631

# directory wasn't entirely moved remotely

636

# directory wasn't entirely moved remotely

632

invalid.add(dsrc)

637

invalid.add(dsrc)

633

elif dsrc in dirmove and dirmove[dsrc] != ddst:

638

elif dsrc in dirmove and dirmove[dsrc] != ddst:

634

# files from the same directory moved to two different places

639

# files from the same directory moved to two different places

635

invalid.add(dsrc)

640

invalid.add(dsrc)

636

else:

641

else:

637

# looks good so far

642

# looks good so far

638

dirmove[dsrc] = ddst

643

dirmove[dsrc] = ddst

639

644

640

for i in invalid:

645

for i in invalid:

641

if i in dirmove:

646

if i in dirmove:

642

del dirmove[i]

647

del dirmove[i]

643

del d1, d2, invalid

648

del d1, d2, invalid

644

649

645

if not dirmove:

650

if not dirmove:

646

return copy, {}, diverge, renamedelete, {}

651

return copy, {}, diverge, renamedelete, {}

647

652

648

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

653

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

649

654

650

for d in dirmove:

655

for d in dirmove:

651

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

656

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

652

(d, dirmove[d]))

657

(d, dirmove[d]))

653

658

654

movewithdir = {}

659

movewithdir = {}

655

# check unaccounted nonoverlapping files against directory moves

660

# check unaccounted nonoverlapping files against directory moves

656

for f in u1r + u2r:

661

for f in u1r + u2r:

657

if f not in fullcopy:

662

if f not in fullcopy:

658

for d in dirmove:

663

for d in dirmove:

659

if f.startswith(d):

664

if f.startswith(d):

660

# new file added in a directory that was moved, move it

665

# new file added in a directory that was moved, move it

661

df = dirmove[d] + f[len(d):]

666

df = dirmove[d] + f[len(d):]

662

if df not in copy:

667

if df not in copy:

663

movewithdir[f] = df

668

movewithdir[f] = df

664

repo.ui.debug((" pending file src: '%s' -> "

669

repo.ui.debug((" pending file src: '%s' -> "

665

"dst: '%s'\n") % (f, df))

670

"dst: '%s'\n") % (f, df))

666

break

671

break

667

672

668

return copy, movewithdir, diverge, renamedelete, dirmove

673

return copy, movewithdir, diverge, renamedelete, dirmove

669

674

670

def _heuristicscopytracing(repo, c1, c2, base):

675

def _heuristicscopytracing(repo, c1, c2, base):

671

""" Fast copytracing using filename heuristics

676

""" Fast copytracing using filename heuristics

672

677

673

Assumes that moves or renames are of following two types:

678

Assumes that moves or renames are of following two types:

674

679

675

1) Inside a directory only (same directory name but different filenames)

680

1) Inside a directory only (same directory name but different filenames)

676

2) Move from one directory to another

681

2) Move from one directory to another

677

(same filenames but different directory names)

682

(same filenames but different directory names)

678

683

679

Works only when there are no merge commits in the "source branch".

684

Works only when there are no merge commits in the "source branch".

680

Source branch is commits from base up to c2 not including base.

685

Source branch is commits from base up to c2 not including base.

681

686

682

If merge is involved it fallbacks to _fullcopytracing().

687

If merge is involved it fallbacks to _fullcopytracing().

683

688

684

Can be used by setting the following config:

689

Can be used by setting the following config:

685

690

686

[experimental]

691

[experimental]

687

copytrace = heuristics

692

copytrace = heuristics

688

693

689

In some cases the copy/move candidates found by heuristics can be very large

694

In some cases the copy/move candidates found by heuristics can be very large

690

in number and that will make the algorithm slow. The number of possible

695

in number and that will make the algorithm slow. The number of possible

691

candidates to check can be limited by using the config

696

candidates to check can be limited by using the config

692

`experimental.copytrace.movecandidateslimit` which defaults to 100.

697

`experimental.copytrace.movecandidateslimit` which defaults to 100.

693

"""

698

"""

694

699

695

if c1.rev() is None:

700

if c1.rev() is None:

696

c1 = c1.p1()

701

c1 = c1.p1()

697

if c2.rev() is None:

702

if c2.rev() is None:

698

c2 = c2.p1()

703

c2 = c2.p1()

699

704

700

copies = {}

705

copies = {}

701

706

702

changedfiles = set()

707

changedfiles = set()

703

m1 = c1.manifest()

708

m1 = c1.manifest()

704

if not repo.revs('%d::%d', base.rev(), c2.rev()):

709

if not repo.revs('%d::%d', base.rev(), c2.rev()):

705

# If base is not in c2 branch, we switch to fullcopytracing

710

# If base is not in c2 branch, we switch to fullcopytracing

706

repo.ui.debug("switching to full copytracing as base is not "

711

repo.ui.debug("switching to full copytracing as base is not "

707

"an ancestor of c2\n")

712

"an ancestor of c2\n")

708

return _fullcopytracing(repo, c1, c2, base)

713

return _fullcopytracing(repo, c1, c2, base)

709

714

710

ctx = c2

715

ctx = c2

711

while ctx != base:

716

while ctx != base:

712

if len(ctx.parents()) == 2:

717

if len(ctx.parents()) == 2:

713

# To keep things simple let's not handle merges

718

# To keep things simple let's not handle merges

714

repo.ui.debug("switching to full copytracing because of merges\n")

719

repo.ui.debug("switching to full copytracing because of merges\n")

715

return _fullcopytracing(repo, c1, c2, base)

720

return _fullcopytracing(repo, c1, c2, base)

716

changedfiles.update(ctx.files())

721

changedfiles.update(ctx.files())

717

ctx = ctx.p1()

722

ctx = ctx.p1()

718

723

719

cp = _forwardcopies(base, c2)

724

cp = _forwardcopies(base, c2)

720

for dst, src in cp.iteritems():

725

for dst, src in cp.iteritems():

721

if src in m1:

726

if src in m1:

722

copies[dst] = src

727

copies[dst] = src

723

728

724

# file is missing if it isn't present in the destination, but is present in

729

# file is missing if it isn't present in the destination, but is present in

725

# the base and present in the source.

730

# the base and present in the source.

726

# Presence in the base is important to exclude added files, presence in the

731

# Presence in the base is important to exclude added files, presence in the

727

# source is important to exclude removed files.

732

# source is important to exclude removed files.

728

filt = lambda f: f not in m1 and f in base and f in c2

733

filt = lambda f: f not in m1 and f in base and f in c2

729

missingfiles = [f for f in changedfiles if filt(f)]

734

missingfiles = [f for f in changedfiles if filt(f)]

730

735

731

if missingfiles:

736

if missingfiles:

732

basenametofilename = collections.defaultdict(list)

737

basenametofilename = collections.defaultdict(list)

733

dirnametofilename = collections.defaultdict(list)

738

dirnametofilename = collections.defaultdict(list)

734

739

735

for f in m1.filesnotin(base.manifest()):

740

for f in m1.filesnotin(base.manifest()):

736

basename = os.path.basename(f)

741

basename = os.path.basename(f)

737

dirname = os.path.dirname(f)

742

dirname = os.path.dirname(f)

738

basenametofilename[basename].append(f)

743

basenametofilename[basename].append(f)

739

dirnametofilename[dirname].append(f)

744

dirnametofilename[dirname].append(f)

740

745

741

for f in missingfiles:

746

for f in missingfiles:

742

basename = os.path.basename(f)

747

basename = os.path.basename(f)

743

dirname = os.path.dirname(f)

748

dirname = os.path.dirname(f)

744

samebasename = basenametofilename[basename]

749

samebasename = basenametofilename[basename]

745

samedirname = dirnametofilename[dirname]

750

samedirname = dirnametofilename[dirname]

746

movecandidates = samebasename + samedirname

751

movecandidates = samebasename + samedirname

747

# f is guaranteed to be present in c2, that's why

752

# f is guaranteed to be present in c2, that's why

748

# c2.filectx(f) won't fail

753

# c2.filectx(f) won't fail

749

f2 = c2.filectx(f)

754

f2 = c2.filectx(f)

750

# we can have a lot of candidates which can slow down the heuristics

755

# we can have a lot of candidates which can slow down the heuristics

751

# config value to limit the number of candidates moves to check

756

# config value to limit the number of candidates moves to check

752

maxcandidates = repo.ui.configint('experimental',

757

maxcandidates = repo.ui.configint('experimental',

753

'copytrace.movecandidateslimit')

758

'copytrace.movecandidateslimit')

754

759

755

if len(movecandidates) > maxcandidates:

760

if len(movecandidates) > maxcandidates:

756

repo.ui.status(_("skipping copytracing for '%s', more "

761

repo.ui.status(_("skipping copytracing for '%s', more "

757

"candidates than the limit: %d\n")

762

"candidates than the limit: %d\n")

758

% (f, len(movecandidates)))

763

% (f, len(movecandidates)))

759

continue

764

continue

760

765

761

for candidate in movecandidates:

766

for candidate in movecandidates:

762

f1 = c1.filectx(candidate)

767

f1 = c1.filectx(candidate)

763

if _related(f1, f2):

768

if _related(f1, f2):

764

# if there are a few related copies then we'll merge

769

# if there are a few related copies then we'll merge

765

# changes into all of them. This matches the behaviour

770

# changes into all of them. This matches the behaviour

766

# of upstream copytracing

771

# of upstream copytracing

767

copies[candidate] = f

772

copies[candidate] = f

768

773

769

return copies, {}, {}, {}, {}

774

return copies, {}, {}, {}, {}

770

775

771

def _related(f1, f2):

776

def _related(f1, f2):

772

"""return True if f1 and f2 filectx have a common ancestor

777

"""return True if f1 and f2 filectx have a common ancestor

773

778

774

Walk back to common ancestor to see if the two files originate

779

Walk back to common ancestor to see if the two files originate

775

from the same file. Since workingfilectx's rev() is None it messes

780

from the same file. Since workingfilectx's rev() is None it messes

776

up the integer comparison logic, hence the pre-step check for

781

up the integer comparison logic, hence the pre-step check for

777

None (f1 and f2 can only be workingfilectx's initially).

782

None (f1 and f2 can only be workingfilectx's initially).

778

"""

783

"""

779

784

780

if f1 == f2:

785

if f1 == f2:

781

return True # a match

786

return True # a match

782

787

783

g1, g2 = f1.ancestors(), f2.ancestors()

788

g1, g2 = f1.ancestors(), f2.ancestors()

784

try:

789

try:

785

f1r, f2r = f1.linkrev(), f2.linkrev()

790

f1r, f2r = f1.linkrev(), f2.linkrev()

786

791

787

if f1r is None:

792

if f1r is None:

788

f1 = next(g1)

793

f1 = next(g1)

789

if f2r is None:

794

if f2r is None:

790

f2 = next(g2)

795

f2 = next(g2)

791

796

792

while True:

797

while True:

793

f1r, f2r = f1.linkrev(), f2.linkrev()

798

f1r, f2r = f1.linkrev(), f2.linkrev()

794

if f1r > f2r:

799

if f1r > f2r:

795

f1 = next(g1)

800

f1 = next(g1)

796

elif f2r > f1r:

801

elif f2r > f1r:

797

f2 = next(g2)

802

f2 = next(g2)

798

else: # f1 and f2 point to files in the same linkrev

803

else: # f1 and f2 point to files in the same linkrev

799

return f1 == f2 # true if they point to the same file

804

return f1 == f2 # true if they point to the same file

800

except StopIteration:

805

except StopIteration:

801

return False

806

return False

802

807

803

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

808

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

804

"""

809

"""

805

check possible copies of f from msrc to mdst

810

check possible copies of f from msrc to mdst

806

811

807

srcctx = starting context for f in msrc

812

srcctx = starting context for f in msrc

808

dstctx = destination context for f in mdst

813

dstctx = destination context for f in mdst

809

f = the filename to check (as in msrc)

814

f = the filename to check (as in msrc)

810

base = the changectx used as a merge base

815

base = the changectx used as a merge base

811

tca = topological common ancestor for graft-like scenarios

816

tca = topological common ancestor for graft-like scenarios

812

remotebase = True if base is outside tca::srcctx, False otherwise

817

remotebase = True if base is outside tca::srcctx, False otherwise

813

limit = the rev number to not search beyond

818

limit = the rev number to not search beyond

814

data = dictionary of dictionary to store copy data. (see mergecopies)

819

data = dictionary of dictionary to store copy data. (see mergecopies)

815

820

816

note: limit is only an optimization, and provides no guarantee that

821

note: limit is only an optimization, and provides no guarantee that

817

irrelevant revisions will not be visited

822

irrelevant revisions will not be visited

818

there is no easy way to make this algorithm stop in a guaranteed way

823

there is no easy way to make this algorithm stop in a guaranteed way

819

once it "goes behind a certain revision".

824

once it "goes behind a certain revision".

820

"""

825

"""

821

826

822

msrc = srcctx.manifest()

827

msrc = srcctx.manifest()

823

mdst = dstctx.manifest()

828

mdst = dstctx.manifest()

824

mb = base.manifest()

829

mb = base.manifest()

825

mta = tca.manifest()

830

mta = tca.manifest()

826

# Might be true if this call is about finding backward renames,

831

# Might be true if this call is about finding backward renames,

827

# This happens in the case of grafts because the DAG is then rotated.

832

# This happens in the case of grafts because the DAG is then rotated.

828

# If the file exists in both the base and the source, we are not looking

833

# If the file exists in both the base and the source, we are not looking

829

# for a rename on the source side, but on the part of the DAG that is

834

# for a rename on the source side, but on the part of the DAG that is

830

# traversed backwards.

835

# traversed backwards.

831

#

836

#

832

# In the case there is both backward and forward renames (before and after

837

# In the case there is both backward and forward renames (before and after

833

# the base) this is more complicated as we must detect a divergence.

838

# the base) this is more complicated as we must detect a divergence.

834

# We use 'backwards = False' in that case.

839

# We use 'backwards = False' in that case.

835

backwards = not remotebase and base != tca and f in mb

840

backwards = not remotebase and base != tca and f in mb

836

getsrcfctx = _makegetfctx(srcctx)

841

getsrcfctx = _makegetfctx(srcctx)

837

getdstfctx = _makegetfctx(dstctx)

842

getdstfctx = _makegetfctx(dstctx)

838

843

839

if msrc[f] == mb.get(f) and not remotebase:

844

if msrc[f] == mb.get(f) and not remotebase:

840

# Nothing to merge

845

# Nothing to merge

841

return

846

return

842

847

843

of = None

848

of = None

844

seen = {f}

849

seen = {f}

845

for oc in getsrcfctx(f, msrc[f]).ancestors():

850

for oc in getsrcfctx(f, msrc[f]).ancestors():

846

of = oc.path()

851

of = oc.path()

847

if of in seen:

852

if of in seen:

848

# check limit late - grab last rename before

853

# check limit late - grab last rename before

849

if oc.linkrev() < limit:

854

if oc.linkrev() < limit:

850

break

855

break

851

continue

856

continue

852

seen.add(of)

857

seen.add(of)

853

858

854

# remember for dir rename detection

859

# remember for dir rename detection

855

if backwards:

860

if backwards:

856

data['fullcopy'][of] = f # grafting backwards through renames

861

data['fullcopy'][of] = f # grafting backwards through renames

857

else:

862

else:

858

data['fullcopy'][f] = of

863

data['fullcopy'][f] = of

859

if of not in mdst:

864

if of not in mdst:

860

continue # no match, keep looking

865

continue # no match, keep looking

861

if mdst[of] == mb.get(of):

866

if mdst[of] == mb.get(of):

862

return # no merge needed, quit early

867

return # no merge needed, quit early

863

c2 = getdstfctx(of, mdst[of])

868

c2 = getdstfctx(of, mdst[of])

864

# c2 might be a plain new file on added on destination side that is

869

# c2 might be a plain new file on added on destination side that is

865

# unrelated to the droids we are looking for.

870

# unrelated to the droids we are looking for.

866

cr = _related(oc, c2)

871

cr = _related(oc, c2)

867

if cr and (of == f or of == c2.path()): # non-divergent

872

if cr and (of == f or of == c2.path()): # non-divergent

868

if backwards:

873

if backwards:

869

data['copy'][of] = f

874

data['copy'][of] = f

870

elif of in mb:

875

elif of in mb:

871

data['copy'][f] = of

876

data['copy'][f] = of

872

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

877

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

873

data['copy'][of] = f

878

data['copy'][of] = f

874

del data['fullcopy'][f]

879

del data['fullcopy'][f]

875

data['fullcopy'][of] = f

880

data['fullcopy'][of] = f

876

else: # divergence w.r.t. graft CA on one side of topological CA

881

else: # divergence w.r.t. graft CA on one side of topological CA

877

for sf in seen:

882

for sf in seen:

878

if sf in mb:

883

if sf in mb:

879

assert sf not in data['diverge']

884

assert sf not in data['diverge']

880

data['diverge'][sf] = [f, of]

885

data['diverge'][sf] = [f, of]

881

break

886

break

882

return

887

return

883

888

884

if of in mta:

889

if of in mta:

885

if backwards or remotebase:

890

if backwards or remotebase:

886

data['incomplete'][of] = f

891

data['incomplete'][of] = f

887

else:

892

else:

888

for sf in seen:

893

for sf in seen:

889

if sf in mb:

894

if sf in mb:

890

if tca == base:

895

if tca == base:

891

data['diverge'].setdefault(sf, []).append(f)

896

data['diverge'].setdefault(sf, []).append(f)

892

else:

897

else:

893

data['incompletediverge'][sf] = [of, f]

898

data['incompletediverge'][sf] = [of, f]

894

return

899

return

895

900

896

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

901

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

897

"""reproduce copies from fromrev to rev in the dirstate

902

"""reproduce copies from fromrev to rev in the dirstate

898

903

899

If skiprev is specified, it's a revision that should be used to

904

If skiprev is specified, it's a revision that should be used to

900

filter copy records. Any copies that occur between fromrev and

905

filter copy records. Any copies that occur between fromrev and

901

skiprev will not be duplicated, even if they appear in the set of

906

skiprev will not be duplicated, even if they appear in the set of

902

copies between fromrev and rev.

907

copies between fromrev and rev.

903

"""

908

"""

904

exclude = {}

909

exclude = {}

905

ctraceconfig = repo.ui.config('experimental', 'copytrace')

910

ctraceconfig = repo.ui.config('experimental', 'copytrace')

906

bctrace = stringutil.parsebool(ctraceconfig)

911

bctrace = stringutil.parsebool(ctraceconfig)

907

if (skiprev is not None and

912

if (skiprev is not None and

908

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

913

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

909

# copytrace='off' skips this line, but not the entire function because

914

# copytrace='off' skips this line, but not the entire function because

910

# the line below is O(size of the repo) during a rebase, while the rest

915

# the line below is O(size of the repo) during a rebase, while the rest

911

# of the function is much faster (and is required for carrying copy

916

# of the function is much faster (and is required for carrying copy

912

# metadata across the rebase anyway).

917

# metadata across the rebase anyway).

913

exclude = pathcopies(repo[fromrev], repo[skiprev])

918

exclude = pathcopies(repo[fromrev], repo[skiprev])

914

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

919

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

915

# copies.pathcopies returns backward renames, so dst might not

920

# copies.pathcopies returns backward renames, so dst might not

916

# actually be in the dirstate

921

# actually be in the dirstate

917

if dst in exclude:

922

if dst in exclude:

918

continue

923

continue

919

wctx[dst].markcopied(src)

924

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             def _findlimit(repo, ctxa, ctxb):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 wdirparents = None
                 a = ctxa.rev()
                 b = ctxb.rev()
                 if a is None:
                     wdirparents = (ctxa.p1(), ctxa.p2())
                     a = node.wdirrev
                 if b is None:
                     assert not wdirparents
                     wdirparents = (ctxb.p1(), ctxb.p2())
                     b = node.wdirrev
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 limit = node.wdirrev
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == node.wdirrev:
                         parents = [pctx.rev() for pctx in wdirparents]
                     else:
                         parents = cl.parentrevs(r)
                     if parents[1] == node.nullrev:
                         parents = parents[:1]
                     for p in parents:
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in list(t.items()):
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=node.nullrev):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and not f.isintroducedafter(limit):
                         return None
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg('debug.copies:    looking into rename from %s to %s\n'
                         % (a, b))
                 limit = _findlimit(repo, a, b)
                 if debug:
                     dbg('debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg('debug.copies:      missing file to search: %d\n' % len(missing))
                 for f in missing:
                     if debug:
                         dbg('debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         if debug:
                             dbg('debug.copies:          rename of: %s\n' % ofctx._path)
                         cm[f] = ofctx.path()
                     if debug:
                         dbg('debug.copies:          time: %f seconds\n'
                             % (util.timer() - start))
                 return cm
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b._repo, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b._repo, match))
                 return _committedforwardcopies(a, b, match)
-            def _backwardrenames(a, b):
+            def _backwardrenames(a, b, match):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
+                # We don't want to pass in "match" here, since that would filter
+                # the destination by it. Since we're reversing the copies, we want
+                # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
+                    if match and not match(v):
+                        continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 if debug:
                     repo.ui.debug('debug.copies: searching copies from %s to %s\n'
                                   % (x, y))
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: forward\n')
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: backward\n')
-                    return _backwardrenames(x, y)
+                    return _backwardrenames(x, y, match=match)
                 if debug:
                     repo.ui.debug('debug.copies: search mode: combined\n')
-                return _chain(x, y, _backwardrenames(x, a),
+                return _chain(x, y, _backwardrenames(x, a, match=match),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirfilenodeids:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 boolctrace = stringutil.parsebool(copytracing)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 elif boolctrace is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.isancestorof(wc) is False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not base.isancestorof(_c1)
                 dirtyc2 = not base.isancestorof(_c2)
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1, c2)
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2,
                                                   m1.filesnotin(mta, repo.narrowmatch()),
                                                   m2.filesnotin(mta, repo.narrowmatch()),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if oc.linkrev() < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config('experimental', 'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if (skiprev is not None and
                     (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)

               $ cat >> $HGRCPATH << EOF
               > [alias]
               > l = log -G -T '{rev} {desc}\n{files}\n'
               > EOF
               $ REPONUM=0
               $ newrepo() {
               >     cd $TESTTMP
               >     REPONUM=`expr $REPONUM + 1`
               >     hg init repo-$REPONUM
               >     cd repo-$REPONUM
               > }
             Simple rename case
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg mv x y
               $ hg ci -m 'rename x to y'
               $ hg l
               @  1 rename x to y
               |  x y
               o  0 add x
                  x
               $ hg debugpathcopies 0 1
               x -> y
               $ hg debugpathcopies 1 0
               y -> x
             Test filtering copies by path. We do filtering by destination.
               $ hg debugpathcopies 0 1 x
               $ hg debugpathcopies 1 0 x
               y -> x
               $ hg debugpathcopies 0 1 y
               x -> y
-            BROKEN: the following command should not include the copy
               $ hg debugpathcopies 1 0 y
-              y -> x
             Copy a file onto another file
               $ newrepo
               $ echo x > x
               $ echo y > y
               $ hg ci -Aqm 'add x and y'
               $ hg cp -f x y
               $ hg ci -m 'copy x onto y'
               $ hg l
               @  1 copy x onto y
               |  y
               o  0 add x and y
                  x y
             Incorrectly doesn't show the rename
               $ hg debugpathcopies 0 1
             Copy a file onto another file with same content. If metadata is stored in changeset, this does not
             produce a new filelog entry. The changeset's "files" entry should still list the file.
               $ newrepo
               $ echo x > x
               $ echo x > x2
               $ hg ci -Aqm 'add x and x2 with same content'
               $ hg cp -f x x2
               $ hg ci -m 'copy x onto x2'
               $ hg l
               @  1 copy x onto x2
               |  x2
               o  0 add x and x2 with same content
                  x x2
             Incorrectly doesn't show the rename
               $ hg debugpathcopies 0 1
             Copy a file, then delete destination, then copy again. This does not create a new filelog entry.
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg cp x y
               $ hg ci -m 'copy x to y'
               $ hg rm y
               $ hg ci -m 'remove y'
               $ hg cp -f x y
               $ hg ci -m 'copy x onto y (again)'
               $ hg l
               @  3 copy x onto y (again)
               |  y
               o  2 remove y
               |  y
               o  1 copy x to y
               |  y
               o  0 add x
                  x
               $ hg debugpathcopies 0 3
               x -> y
             Rename file in a loop: x->y->z->x
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg mv x y
               $ hg ci -m 'rename x to y'
               $ hg mv y z
               $ hg ci -m 'rename y to z'
               $ hg mv z x
               $ hg ci -m 'rename z to x'
               $ hg l
               @  3 rename z to x
               |  x z
               o  2 rename y to z
               |  y z
               o  1 rename x to y
               |  x y
               o  0 add x
                  x
               $ hg debugpathcopies 0 3
             Copy x to y, then remove y, then add back y. With copy metadata in the changeset, this could easily
             end up reporting y as copied from x (if we don't unmark it as a copy when it's removed).
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg mv x y
               $ hg ci -m 'rename x to y'
               $ hg rm y
               $ hg ci -qm 'remove y'
               $ echo x > y
               $ hg ci -Aqm 'add back y'
               $ hg l
               @  3 add back y
               |  y
               o  2 remove y
               |  y
               o  1 rename x to y
               |  x y
               o  0 add x
                  x
               $ hg debugpathcopies 0 3
             Copy x to z, then remove z, then copy x2 (same content as x) to z. With copy metadata in the
             changeset, the two copies here will have the same filelog entry, so ctx['z'].introrev() might point
             to the first commit that added the file. We should still report the copy as being from x2.
               $ newrepo
               $ echo x > x
               $ echo x > x2
               $ hg ci -Aqm 'add x and x2 with same content'
               $ hg cp x z
               $ hg ci -qm 'copy x to z'
               $ hg rm z
               $ hg ci -m 'remove z'
               $ hg cp x2 z
               $ hg ci -m 'copy x2 to z'
               $ hg l
               @  3 copy x2 to z
               |  z
               o  2 remove z
               |  z
               o  1 copy x to z
               |  z
               o  0 add x and x2 with same content
                  x x2
               $ hg debugpathcopies 0 3
               x2 -> z
             Create x and y, then rename them both to the same name, but on different sides of a fork
               $ newrepo
               $ echo x > x
               $ echo y > y
               $ hg ci -Aqm 'add x and y'
               $ hg mv x z
               $ hg ci -qm 'rename x to z'
               $ hg co -q 0
               $ hg mv y z
               $ hg ci -qm 'rename y to z'
               $ hg l
               @  2 rename y to z
               |  y z
               | o  1 rename x to z
               |/   x z
               o  0 add x and y
                  x y
               $ hg debugpathcopies 1 2
               z -> x
               y -> z
             Fork renames x to y on one side and removes x on the other
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg mv x y
               $ hg ci -m 'rename x to y'
               $ hg co -q 0
               $ hg rm x
               $ hg ci -m 'remove x'
               created new head
               $ hg l
               @  2 remove x
               |  x
               | o  1 rename x to y
               |/   x y
               o  0 add x
                  x
             BROKEN: x doesn't exist here
               $ hg debugpathcopies 1 2
               y -> x
             Copies via null revision (there shouldn't be any)
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg cp x y
               $ hg ci -m 'copy x to y'
               $ hg co -q null
               $ echo x > x
               $ hg ci -Aqm 'add x (again)'
               $ hg l
               @  2 add x (again)
                  x
               o  1 copy x to y
               |  y
               o  0 add x
                  x
               $ hg debugpathcopies 1 2
               $ hg debugpathcopies 2 1
             Merge rename from other branch
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg mv x y
               $ hg ci -m 'rename x to y'
               $ hg co -q 0
               $ echo z > z
               $ hg ci -Aqm 'add z'
               $ hg merge -q 1
               $ hg ci -m 'merge rename from p2'
               $ hg l
               @    3 merge rename from p2
               |\   x
               | o  2 add z
               | |  z
               o |  1 rename x to y
               |/   x y
               o  0 add x
                  x
             Perhaps we should indicate the rename here, but `hg status` is documented to be weird during
             merges, so...
               $ hg debugpathcopies 0 3
               x -> y
               $ hg debugpathcopies 1 2
               y -> x
               $ hg debugpathcopies 1 3
               $ hg debugpathcopies 2 3
               x -> y
             Copy file from either side in a merge
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg co -q null
               $ echo y > y
               $ hg ci -Aqm 'add y'
               $ hg merge -q 0
               $ hg cp y z
               $ hg ci -m 'copy file from p1 in merge'
               $ hg co -q 1
               $ hg merge -q 0
               $ hg cp x z
               $ hg ci -qm 'copy file from p2 in merge'
               $ hg l
               @    3 copy file from p2 in merge
               |\   z
               +---o  2 copy file from p1 in merge
               | |/   z
               | o  1 add y
               |    y
               o  0 add x
                  x
               $ hg debugpathcopies 1 2
               y -> z
               $ hg debugpathcopies 0 2
               $ hg debugpathcopies 1 3
               $ hg debugpathcopies 0 3
               x -> z
             Copy file that exists on both sides of the merge, same content on both sides
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x on branch 1'
               $ hg co -q null
               $ echo x > x
               $ hg ci -Aqm 'add x on branch 2'
               $ hg merge -q 0
               $ hg cp x z
               $ hg ci -qm 'merge'
               $ hg l
               @    2 merge
               |\   z
               | o  1 add x on branch 2
               |    x
               o  0 add x on branch 1
                  x
             It's a little weird that it shows up on both sides
               $ hg debugpathcopies 1 2
               x -> z
               $ hg debugpathcopies 0 2
               x -> z
             Copy file that exists on both sides of the merge, different content
               $ newrepo
               $ echo branch1 > x
               $ hg ci -Aqm 'add x on branch 1'
               $ hg co -q null
               $ echo branch2 > x
               $ hg ci -Aqm 'add x on branch 2'
               $ hg merge -q 0
               warning: conflicts while merging x! (edit, then use 'hg resolve --mark')
               [1]
               $ echo resolved > x
               $ hg resolve -m x
               (no more unresolved files)
               $ hg cp x z
               $ hg ci -qm 'merge'
               $ hg l
               @    2 merge
               |\   x z
               | o  1 add x on branch 2
               |    x
               o  0 add x on branch 1
                  x
               $ hg debugpathcopies 1 2
               $ hg debugpathcopies 0 2
               x -> z
             Copy x->y on one side of merge and copy x->z on the other side. Pathcopies from one parent
             of the merge to the merge should include the copy from the other side.
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg cp x y
               $ hg ci -qm 'copy x to y'
               $ hg co -q 0
               $ hg cp x z
               $ hg ci -qm 'copy x to z'
               $ hg merge -q 1
               $ hg ci -m 'merge copy x->y and copy x->z'
               $ hg l
               @    3 merge copy x->y and copy x->z
               |\
               | o  2 copy x to z
               | |  z
               o |  1 copy x to y
               |/   y
               o  0 add x
                  x
               $ hg debugpathcopies 2 3
               x -> y
               $ hg debugpathcopies 1 3
               x -> z
             Copy x to y on one side of merge, create y and rename to z on the other side. Pathcopies from the
             first side should not include the y->z rename since y didn't exist in the merge base.
               $ newrepo
               $ echo x > x
               $ hg ci -Aqm 'add x'
               $ hg cp x y
               $ hg ci -qm 'copy x to y'
               $ hg co -q 0
               $ echo y > y
               $ hg ci -Aqm 'add y'
               $ hg mv y z
               $ hg ci -m 'rename y to z'
               $ hg merge -q 1
               $ hg ci -m 'merge'
               $ hg l
               @    4 merge
               |\
               | o  3 rename y to z
               | |  y z
               | o  2 add y
               | |  y
               o |  1 copy x to y
               |/   y
               o  0 add x
                  x
               $ hg debugpathcopies 2 3
               y -> z
               $ hg debugpathcopies 1 3
             Create x and y, then rename x to z on one side of merge, and rename y to z and modify z on the
             other side.
               $ newrepo
               $ echo x > x
               $ echo y > y
               $ hg ci -Aqm 'add x and y'
               $ hg mv x z
               $ hg ci -qm 'rename x to z'
               $ hg co -q 0
               $ hg mv y z
               $ hg ci -qm 'rename y to z'
               $ echo z >> z
               $ hg ci -m 'modify z'
               $ hg merge -q 1
               warning: conflicts while merging z! (edit, then use 'hg resolve --mark')
               [1]
               $ echo z > z
               $ hg resolve -qm z
               $ hg ci -m 'merge 1 into 3'
             Try merging the other direction too
               $ hg co -q 1
               $ hg merge -q 3
               warning: conflicts while merging z! (edit, then use 'hg resolve --mark')
               [1]
               $ echo z > z
               $ hg resolve -qm z
               $ hg ci -m 'merge 3 into 1'
               created new head
               $ hg l
               @    5 merge 3 into 1
               |\   y z
               +---o  4 merge 1 into 3
               | |/   x z
               | o  3 modify z
               | |  z
               | o  2 rename y to z
               | |  y z
               o |  1 rename x to z
               |/   x z
               o  0 add x and y
                  x y
               $ hg debugpathcopies 1 4
               $ hg debugpathcopies 2 4
               $ hg debugpathcopies 0 4
               x -> z
               $ hg debugpathcopies 1 5
               $ hg debugpathcopies 2 5
               $ hg debugpathcopies 0 5
               x -> z