upstream/mercurial-mirror Commit - r42287:967c098e

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

util,

20

util,

21

)

21

)

22

from .utils import (

22

from .utils import (

23

stringutil,

23

stringutil,

24

)

24

)

25

26

def _findlimit(repo, ctxa, ctxb):

26

def _findlimit(repo, ctxa, ctxb):

27

"""

27

"""

28

Find the last revision that needs to be checked to ensure that a full

28

Find the last revision that needs to be checked to ensure that a full

29

transitive closure for file copies can be properly calculated.

29

transitive closure for file copies can be properly calculated.

30

Generally, this means finding the earliest revision number that's an

30

Generally, this means finding the earliest revision number that's an

31

ancestor of a or b but not both, except when a or b is a direct descendent

31

ancestor of a or b but not both, except when a or b is a direct descendent

32

of the other, in which case we can return the minimum revnum of a and b.

32

of the other, in which case we can return the minimum revnum of a and b.

33

"""

33

"""

34

35

# basic idea:

35

# basic idea:

36

# - mark a and b with different sides

36

# - mark a and b with different sides

37

# - if a parent's children are all on the same side, the parent is

37

# - if a parent's children are all on the same side, the parent is

38

# on that side, otherwise it is on no side

38

# on that side, otherwise it is on no side

39

# - walk the graph in topological order with the help of a heap;

39

# - walk the graph in topological order with the help of a heap;

40

# - add unseen parents to side map

40

# - add unseen parents to side map

41

# - clear side of any parent that has children on different sides

41

# - clear side of any parent that has children on different sides

42

# - track number of interesting revs that might still be on a side

42

# - track number of interesting revs that might still be on a side

43

# - track the lowest interesting rev seen

43

# - track the lowest interesting rev seen

44

# - quit when interesting revs is zero

44

# - quit when interesting revs is zero

45

46

cl = repo.changelog

46

cl = repo.changelog

47

wdirparents = None

47

wdirparents = None

48

a = ctxa.rev()

48

a = ctxa.rev()

49

b = ctxb.rev()

49

b = ctxb.rev()

50

if a is None:

50

if a is None:

51

wdirparents = (ctxa.p1(), ctxa.p2())

51

wdirparents = (ctxa.p1(), ctxa.p2())

52

a = node.wdirrev

52

a = node.wdirrev

53

if b is None:

53

if b is None:

54

assert not wdirparents

54

assert not wdirparents

55

wdirparents = (ctxb.p1(), ctxb.p2())

55

wdirparents = (ctxb.p1(), ctxb.p2())

56

b = node.wdirrev

56

b = node.wdirrev

57

58

side = {a: -1, b: 1}

58

side = {a: -1, b: 1}

59

visit = [-a, -b]

59

visit = [-a, -b]

60

heapq.heapify(visit)

60

heapq.heapify(visit)

61

interesting = len(visit)

61

interesting = len(visit)

62

limit = node.wdirrev

62

limit = node.wdirrev

63

64

while interesting:

64

while interesting:

65

r = -heapq.heappop(visit)

65

r = -heapq.heappop(visit)

66

if r == node.wdirrev:

66

if r == node.wdirrev:

67

parents = [pctx.rev() for pctx in wdirparents]

67

parents = [pctx.rev() for pctx in wdirparents]

68

else:

68

else:

69

parents = cl.parentrevs(r)

69

parents = cl.parentrevs(r)

70

if parents[1] == node.nullrev:

70

if parents[1] == node.nullrev:

71

parents = parents[:1]

71

parents = parents[:1]

72

for p in parents:

72

for p in parents:

73

if p not in side:

73

if p not in side:

74

# first time we see p; add it to visit

74

# first time we see p; add it to visit

75

side[p] = side[r]

75

side[p] = side[r]

76

if side[p]:

76

if side[p]:

77

interesting += 1

77

interesting += 1

78

heapq.heappush(visit, -p)

78

heapq.heappush(visit, -p)

79

elif side[p] and side[p] != side[r]:

79

elif side[p] and side[p] != side[r]:

80

# p was interesting but now we know better

80

# p was interesting but now we know better

81

side[p] = 0

81

side[p] = 0

82

interesting -= 1

82

interesting -= 1

83

if side[r]:

83

if side[r]:

84

limit = r # lowest rev visited

84

limit = r # lowest rev visited

85

interesting -= 1

85

interesting -= 1

86

87

# Consider the following flow (see test-commit-amend.t under issue4405):

87

# Consider the following flow (see test-commit-amend.t under issue4405):

88

# 1/ File 'a0' committed

88

# 1/ File 'a0' committed

89

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

89

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

90

# 3/ Move back to first commit

90

# 3/ Move back to first commit

91

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

91

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

92

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

92

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

93

#

93

#

94

# During the amend in step five, we will be in this state:

94

# During the amend in step five, we will be in this state:

95

#

95

#

96

# @ 3 temporary amend commit for a1-amend

96

# @ 3 temporary amend commit for a1-amend

97

# |

97

# |

98

# o 2 a1-amend

98

# o 2 a1-amend

99

# |

99

# |

100

# | o 1 a1

100

# | o 1 a1

101

# |/

101

# |/

102

# o 0 a0

102

# o 0 a0

103

#

103

#

104

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

104

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

105

# yet the filelog has the copy information in rev 1 and we will not look

105

# yet the filelog has the copy information in rev 1 and we will not look

106

# back far enough unless we also look at the a and b as candidates.

106

# back far enough unless we also look at the a and b as candidates.

107

# This only occurs when a is a descendent of b or visa-versa.

107

# This only occurs when a is a descendent of b or visa-versa.

108

return min(limit, a, b)

108

return min(limit, a, b)

109

110

def _chain(src, dst, a, b):

110

def _chain(src, dst, a, b):

111

"""chain two sets of copies a->b"""

111

"""chain two sets of copies a->b"""

112

t = a.copy()

112

t = a.copy()

113

for k, v in b.iteritems():

113

for k, v in b.iteritems():

114

if v in t:

114

if v in t:

115

# found a chain

115

# found a chain

116

if t[v] != k:

116

if t[v] != k:

117

# file wasn't renamed back to itself

117

# file wasn't renamed back to itself

118

t[k] = t[v]

118

t[k] = t[v]

119

if v not in dst:

119

if v not in dst:

120

# chain was a rename, not a copy

120

# chain was a rename, not a copy

121

del t[v]

121

del t[v]

122

if v in src:

122

if v in src:

123

# file is a copy of an existing file

123

# file is a copy of an existing file

124

t[k] = v

124

t[k] = v

125

126

for k, v in list(t.items()):

126

for k, v in list(t.items()):

127

# remove criss-crossed copies

127

# remove criss-crossed copies

128

if k in src and v in dst:

128

if k in src and v in dst:

129

del t[k]

129

del t[k]

130

# remove copies to files that were then removed

130

# remove copies to files that were then removed

131

elif k not in dst:

131

elif k not in dst:

132

del t[k]

132

del t[k]

133

134

return t

134

return t

135

136

def _tracefile(fctx, am, limit=node.nullrev):

136

def _tracefile(fctx, am, limit=node.nullrev):

137

"""return file context that is the ancestor of fctx present in ancestor

137

"""return file context that is the ancestor of fctx present in ancestor

138

manifest am, stopping after the first ancestor lower than limit"""

138

manifest am, stopping after the first ancestor lower than limit"""

139

140

for f in fctx.ancestors():

140

for f in fctx.ancestors():

141

if am.get(f.path(), None) == f.filenode():

141

if am.get(f.path(), None) == f.filenode():

142

return f

142

return f

143

if limit >= 0 and not f.isintroducedafter(limit):

143

if limit >= 0 and not f.isintroducedafter(limit):

144

return None

144

return None

145

146

def _dirstatecopies(repo, match=None):

146

def _dirstatecopies(repo, match=None):

147

ds = repo.dirstate

147

ds = repo.dirstate

148

c = ds.copies().copy()

148

c = ds.copies().copy()

149

for k in list(c):

149

for k in list(c):

150

if ds[k] not in 'anm' or (match and not match(k)):

150

if ds[k] not in 'anm' or (match and not match(k)):

151

del c[k]

151

del c[k]

152

return c

152

return c

153

154

def _computeforwardmissing(a, b, match=None):

154

def _computeforwardmissing(a, b, match=None):

155

"""Computes which files are in b but not a.

155

"""Computes which files are in b but not a.

156

This is its own function so extensions can easily wrap this call to see what

156

This is its own function so extensions can easily wrap this call to see what

157

files _forwardcopies is about to process.

157

files _forwardcopies is about to process.

158

"""

158

"""

159

ma = a.manifest()

159

ma = a.manifest()

160

mb = b.manifest()

160

mb = b.manifest()

161

return mb.filesnotin(ma, match=match)

161

return mb.filesnotin(ma, match=match)

162

163

def usechangesetcentricalgo(repo):

163

def usechangesetcentricalgo(repo):

164

"""Checks if we should use changeset-centric copy algorithms"""

164

"""Checks if we should use changeset-centric copy algorithms"""

165

return (repo.ui.config('experimental', 'copies.read-from') ==

165

return (repo.ui.config('experimental', 'copies.read-from') ==

166

'compatibility')

166

'compatibility')

167

168

def _committedforwardcopies(a, b, match):

168

def _committedforwardcopies(a, b, match):

169

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

169

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

170

# files might have to be traced back to the fctx parent of the last

170

# files might have to be traced back to the fctx parent of the last

171

# one-side-only changeset, but not further back than that

171

# one-side-only changeset, but not further back than that

172

repo = a._repo

172

repo = a._repo

173

174

if usechangesetcentricalgo(repo):

174

if usechangesetcentricalgo(repo):

175

return _changesetforwardcopies(a, b, match)

175

return _changesetforwardcopies(a, b, match)

176

177

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

177

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

178

dbg = repo.ui.debug

178

dbg = repo.ui.debug

179

if debug:

179

if debug:

180

dbg('debug.copies: looking into rename from %s to %s\n'

180

dbg('debug.copies: looking into rename from %s to %s\n'

181

% (a, b))

181

% (a, b))

182

limit = _findlimit(repo, a, b)

182

limit = _findlimit(repo, a, b)

183

if debug:

183

if debug:

184

dbg('debug.copies: search limit: %d\n' % limit)

184

dbg('debug.copies: search limit: %d\n' % limit)

185

am = a.manifest()

185

am = a.manifest()

186

187

# find where new files came from

187

# find where new files came from

188

# we currently don't try to find where old files went, too expensive

188

# we currently don't try to find where old files went, too expensive

189

# this means we can miss a case like 'hg rm b; hg cp a b'

189

# this means we can miss a case like 'hg rm b; hg cp a b'

190

cm = {}

190

cm = {}

191

192

# Computing the forward missing is quite expensive on large manifests, since

192

# Computing the forward missing is quite expensive on large manifests, since

193

# it compares the entire manifests. We can optimize it in the common use

193

# it compares the entire manifests. We can optimize it in the common use

194

# case of computing what copies are in a commit versus its parent (like

194

# case of computing what copies are in a commit versus its parent (like

195

# during a rebase or histedit). Note, we exclude merge commits from this

195

# during a rebase or histedit). Note, we exclude merge commits from this

196

# optimization, since the ctx.files() for a merge commit is not correct for

196

# optimization, since the ctx.files() for a merge commit is not correct for

197

# this comparison.

197

# this comparison.

198

forwardmissingmatch = match

198

forwardmissingmatch = match

199

if b.p1() == a and b.p2().node() == node.nullid:

199

if b.p1() == a and b.p2().node() == node.nullid:

200

filesmatcher = matchmod.exact(b.files())

200

filesmatcher = matchmod.exact(b.files())

201

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

201

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

202

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

202

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

203

204

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

204

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

205

206

if debug:

206

if debug:

207

dbg('debug.copies: missing file to search: %d\n' % len(missing))

207

dbg('debug.copies: missing file to search: %d\n' % len(missing))

208

209

for f in missing:

209

for f in missing:

210

if debug:

210

if debug:

211

dbg('debug.copies: tracing file: %s\n' % f)

211

dbg('debug.copies: tracing file: %s\n' % f)

212

fctx = b[f]

212

fctx = b[f]

213

fctx._ancestrycontext = ancestrycontext

213

fctx._ancestrycontext = ancestrycontext

214

215

if debug:

215

if debug:

216

start = util.timer()

216

start = util.timer()

217

ofctx = _tracefile(fctx, am, limit)

217

ofctx = _tracefile(fctx, am, limit)

218

if ofctx:

218

if ofctx:

219

if debug:

219

if debug:

220

dbg('debug.copies: rename of: %s\n' % ofctx._path)

220

dbg('debug.copies: rename of: %s\n' % ofctx._path)

221

cm[f] = ofctx.path()

221

cm[f] = ofctx.path()

222

if debug:

222

if debug:

223

dbg('debug.copies: time: %f seconds\n'

223

dbg('debug.copies: time: %f seconds\n'

224

% (util.timer() - start))

224

% (util.timer() - start))

225

return cm

225

return cm

226

227

def _changesetforwardcopies(a, b, match):

227

def _changesetforwardcopies(a, b, match):

228

if a.rev() == node.nullrev:

228

if a.rev() == node.nullrev:

229

return {}

229

return {}

230

231

repo = a.repo()

231

repo = a.repo()

232

children = {}

232

children = {}

233

cl = repo.changelog

233

cl = repo.changelog

234

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

234

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

235

for r in missingrevs:

235

for r in missingrevs:

236

for p in cl.parentrevs(r):

236

for p in cl.parentrevs(r):

237

if p == node.nullrev:

237

if p == node.nullrev:

238

continue

238

continue

239

if p not in children:

239

if p not in children:

240

children[p] = [r]

240

children[p] = [r]

241

else:

241

else:

242

children[p].append(r)

242

children[p].append(r)

243

244

roots = set(children) - set(missingrevs)

244

roots = set(children) - set(missingrevs)

245

# 'work' contains 3-tuples of a (revision number, parent number, copies).

245

# 'work' contains 3-tuples of a (revision number, parent number, copies).

246

# The parent number is only used for knowing which parent the copies dict

246

# The parent number is only used for knowing which parent the copies dict

247

# came from.

247

# came from.

248

work = [(r, 1, {}) for r in roots]

248

work = [(r, 1, {}) for r in roots]

249

heapq.heapify(work)

249

heapq.heapify(work)

250

while work:

250

while work:

251

r, i1, copies1 = heapq.heappop(work)

251

r, i1, copies1 = heapq.heappop(work)

252

if work and work[0][0] == r:

252

if work and work[0][0] == r:

253

# We are tracing copies from both parents

253

# We are tracing copies from both parents

254

r, i2, copies2 = heapq.heappop(work)

254

r, i2, copies2 = heapq.heappop(work)

255

copies = {}

255

copies = {}

256

ctx = repo[r]

256

ctx = repo[r]

257

p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()

257

p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()

258

allcopies = set(copies1) | set(copies2)

258

allcopies = set(copies1) | set(copies2)

259

# TODO: perhaps this filtering should be done as long as ctx

259

# TODO: perhaps this filtering should be done as long as ctx

260

# is merge, whether or not we're tracing from both parent.

260

# is merge, whether or not we're tracing from both parent.

261

for dst in allcopies:

261

for dst in allcopies:

262

if not match(dst):

262

if not match(dst):

263

continue

263

continue

264

if dst not in copies2:

264

if dst not in copies2:

265

# Copied on p1 side: mark as copy from p1 side if it didn't

265

# Copied on p1 side: mark as copy from p1 side if it didn't

266

# already exist on p2 side

266

# already exist on p2 side

267

if dst not in p2man:

267

if dst not in p2man:

268

copies[dst] = copies1[dst]

268

copies[dst] = copies1[dst]

269

elif dst not in copies1:

269

elif dst not in copies1:

270

# Copied on p2 side: mark as copy from p2 side if it didn't

270

# Copied on p2 side: mark as copy from p2 side if it didn't

271

# already exist on p1 side

271

# already exist on p1 side

272

if dst not in p1man:

272

if dst not in p1man:

273

copies[dst] = copies2[dst]

273

copies[dst] = copies2[dst]

274

else:

274

else:

275

# Copied on both sides: mark as copy from p1 side

275

# Copied on both sides: mark as copy from p1 side

276

copies[dst] = copies1[dst]

276

copies[dst] = copies1[dst]

277

else:

277

else:

278

copies = copies1

278

copies = copies1

279

if r == b.rev():

279

if r == b.rev():

280

return copies

280

return copies

281

for c in children[r]:

281

for c in children[r]:

282

childctx = repo[c]

282

childctx = repo[c]

283

if r == childctx.p1().rev():

283

if r == childctx.p1().rev():

284

parent = 1

284

parent = 1

285

childcopies = childctx.p1copies()

285

childcopies = childctx.p1copies()

286

else:

286

else:

287

assert r == childctx.p2().rev()

287

assert r == childctx.p2().rev()

288

parent = 2

288

parent = 2

289

childcopies = childctx.p2copies()

289

childcopies = childctx.p2copies()

290

if not match.always():

290

if not match.always():

291

childcopies = {dst: src for dst, src in childcopies.items()

291

childcopies = {dst: src for dst, src in childcopies.items()

292

if match(dst)}

292

if match(dst)}

293

childcopies = _chain(a, childctx, copies, childcopies)

293

childcopies = _chain(a, childctx, copies, childcopies)

294

heapq.heappush(work, (c, parent, childcopies))

294

heapq.heappush(work, (c, parent, childcopies))

295

assert False

295

assert False

296

297

def _forwardcopies(a, b, match=None):

297

def _forwardcopies(a, b, match=None):

298

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

298

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

299

300

match = a.repo().narrowmatch(match)

300

match = a.repo().narrowmatch(match)

301

# check for working copy

301

# check for working copy

302

if b.rev() is None:

302

if b.rev() is None:

303

if a == b.p1():

303

if a == b.p1():

304

# short-circuit to avoid issues with merge states

304

# short-circuit to avoid issues with merge states

305

return _dirstatecopies(b._repo, match)

305

return _dirstatecopies(b._repo, match)

306

307

cm = _committedforwardcopies(a, b.p1(), match)

307

cm = _committedforwardcopies(a, b.p1(), match)

308

# combine copies from dirstate if necessary

308

# combine copies from dirstate if necessary

309

return _chain(a, b, cm, _dirstatecopies(b._repo, match))

309

return _chain(a, b, cm, _dirstatecopies(b._repo, match))

310

return _committedforwardcopies(a, b, match)

310

return _committedforwardcopies(a, b, match)

311

312

def _backwardrenames(a, b, match):

312

def _backwardrenames(a, b, match):

313

if a._repo.ui.config('experimental', 'copytrace') == 'off':

313

if a._repo.ui.config('experimental', 'copytrace') == 'off':

314

return {}

314

return {}

315

316

# Even though we're not taking copies into account, 1:n rename situations

316

# Even though we're not taking copies into account, 1:n rename situations

317

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

317

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

318

# arbitrarily pick one of the renames.

318

# arbitrarily pick one of the renames.

319

# We don't want to pass in "match" here, since that would filter

319

# We don't want to pass in "match" here, since that would filter

320

# the destination by it. Since we're reversing the copies, we want

320

# the destination by it. Since we're reversing the copies, we want

321

# to filter the source instead.

321

# to filter the source instead.

322

f = _forwardcopies(b, a)

322

f = _forwardcopies(b, a)

323

r = {}

323

r = {}

324

for k, v in sorted(f.iteritems()):

324

for k, v in sorted(f.iteritems()):

325

if match and not match(v):

325

if match and not match(v):

326

continue

326

continue

327

# remove copies

327

# remove copies

328

if v in a:

328

if v in a:

329

continue

329

continue

330

r[v] = k

330

r[v] = k

331

return r

331

return r

332

333

def pathcopies(x, y, match=None):

333

def pathcopies(x, y, match=None):

334

"""find {dst@y: src@x} copy mapping for directed compare"""

334

"""find {dst@y: src@x} copy mapping for directed compare"""

335

repo = x._repo

335

repo = x._repo

336

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

336

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

337

if debug:

337

if debug:

338

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

338

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

339

% (x, y))

339

% (x, y))

340

if x == y or not x or not y:

340

if x == y or not x or not y:

341

return {}

341

return {}

342

a = y.ancestor(x)

342

a = y.ancestor(x)

343

if a == x:

343

if a == x:

344

if debug:

344

if debug:

345

repo.ui.debug('debug.copies: search mode: forward\n')

345

repo.ui.debug('debug.copies: search mode: forward\n')

346

return _forwardcopies(x, y, match=match)

346

return _forwardcopies(x, y, match=match)

347

if a == y:

347

if a == y:

348

if debug:

348

if debug:

349

repo.ui.debug('debug.copies: search mode: backward\n')

349

repo.ui.debug('debug.copies: search mode: backward\n')

350

return _backwardrenames(x, y, match=match)

350

return _backwardrenames(x, y, match=match)

351

if debug:

351

if debug:

352

repo.ui.debug('debug.copies: search mode: combined\n')

352

repo.ui.debug('debug.copies: search mode: combined\n')

353

return _chain(x, y, _backwardrenames(x, a, match=match),

353

return _chain(x, y, _backwardrenames(x, a, match=match),

354

_forwardcopies(a, y, match=match))

354

_forwardcopies(a, y, match=match))

355

356

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

356

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

357

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

357

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

358

and c2. This is its own function so extensions can easily wrap this call

358

and c2. This is its own function so extensions can easily wrap this call

359

to see what files mergecopies is about to process.

359

to see what files mergecopies is about to process.

360

361

Even though c1 and c2 are not used in this function, they are useful in

361

Even though c1 and c2 are not used in this function, they are useful in

362

other extensions for being able to read the file nodes of the changed files.

362

other extensions for being able to read the file nodes of the changed files.

363

364

"baselabel" can be passed to help distinguish the multiple computations

364

"baselabel" can be passed to help distinguish the multiple computations

365

done in the graft case.

365

done in the graft case.

366

"""

366

"""

367

u1 = sorted(addedinm1 - addedinm2)

367

u1 = sorted(addedinm1 - addedinm2)

368

u2 = sorted(addedinm2 - addedinm1)

368

u2 = sorted(addedinm2 - addedinm1)

369

370

header = " unmatched files in %s"

370

header = " unmatched files in %s"

371

if baselabel:

371

if baselabel:

372

header += ' (from %s)' % baselabel

372

header += ' (from %s)' % baselabel

373

if u1:

373

if u1:

374

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

374

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

375

if u2:

375

if u2:

376

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

376

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

377

378

return u1, u2

378

return u1, u2

379

380

def _makegetfctx(ctx):

380

def _makegetfctx(ctx):

381

"""return a 'getfctx' function suitable for _checkcopies usage

381

"""return a 'getfctx' function suitable for _checkcopies usage

382

383

We have to re-setup the function building 'filectx' for each

383

We have to re-setup the function building 'filectx' for each

384

'_checkcopies' to ensure the linkrev adjustment is properly setup for

384

'_checkcopies' to ensure the linkrev adjustment is properly setup for

385

each. Linkrev adjustment is important to avoid bug in rename

385

each. Linkrev adjustment is important to avoid bug in rename

386

detection. Moreover, having a proper '_ancestrycontext' setup ensures

386

detection. Moreover, having a proper '_ancestrycontext' setup ensures

387

the performance impact of this adjustment is kept limited. Without it,

387

the performance impact of this adjustment is kept limited. Without it,

388

each file could do a full dag traversal making the time complexity of

388

each file could do a full dag traversal making the time complexity of

389

the operation explode (see issue4537).

389

the operation explode (see issue4537).

390

391

This function exists here mostly to limit the impact on stable. Feel

391

This function exists here mostly to limit the impact on stable. Feel

392

free to refactor on default.

392

free to refactor on default.

393

"""

393

"""

394

rev = ctx.rev()

394

rev = ctx.rev()

395

repo = ctx._repo

395

repo = ctx._repo

396

ac = getattr(ctx, '_ancestrycontext', None)

396

ac = getattr(ctx, '_ancestrycontext', None)

397

if ac is None:

397

if ac is None:

398

revs = [rev]

398

revs = [rev]

399

if rev is None:

399

if rev is None:

400

revs = [p.rev() for p in ctx.parents()]

400

revs = [p.rev() for p in ctx.parents()]

401

ac = repo.changelog.ancestors(revs, inclusive=True)

401

ac = repo.changelog.ancestors(revs, inclusive=True)

402

ctx._ancestrycontext = ac

402

ctx._ancestrycontext = ac

403

def makectx(f, n):

403

def makectx(f, n):

404

if n in node.wdirfilenodeids: # in a working context?

404

if n in node.wdirfilenodeids: # in a working context?

405

if ctx.rev() is None:

405

if ctx.rev() is None:

406

return ctx.filectx(f)

406

return ctx.filectx(f)

407

return repo[None][f]

407

return repo[None][f]

408

fctx = repo.filectx(f, fileid=n)

408

fctx = repo.filectx(f, fileid=n)

409

# setup only needed for filectx not create from a changectx

409

# setup only needed for filectx not create from a changectx

410

fctx._ancestrycontext = ac

410

fctx._ancestrycontext = ac

411

fctx._descendantrev = rev

411

fctx._descendantrev = rev

412

return fctx

412

return fctx

413

return util.lrucachefunc(makectx)

413

return util.lrucachefunc(makectx)

414

415

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

415

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

416

"""combine partial copy paths"""

416

"""combine partial copy paths"""

417

remainder = {}

417

remainder = {}

418

for f in copyfrom:

418

for f in copyfrom:

419

if f in copyto:

419

if f in copyto:

420

finalcopy[copyto[f]] = copyfrom[f]

420

finalcopy[copyto[f]] = copyfrom[f]

421

del copyto[f]

421

del copyto[f]

422

for f in incompletediverge:

422

for f in incompletediverge:

423

assert f not in diverge

423

assert f not in diverge

424

ic = incompletediverge[f]

424

ic = incompletediverge[f]

425

if ic[0] in copyto:

425

if ic[0] in copyto:

426

diverge[f] = [copyto[ic[0]], ic[1]]

426

diverge[f] = [copyto[ic[0]], ic[1]]

427

else:

427

else:

428

remainder[f] = ic

428

remainder[f] = ic

429

return remainder

429

return remainder

430

431

def mergecopies(repo, c1, c2, base):

431

def mergecopies(repo, c1, c2, base):

432

"""

432

"""

433

The function calling different copytracing algorithms on the basis of config

433

Finds moves and copies between context c1 and c2 that are relevant for

434

which find moves and copies between context c1 and c2 that are relevant for

435

merging. 'base' will be used as the merge base.

434

merging. 'base' will be used as the merge base.

436

435

437

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

436

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

438

files that were moved/ copied in one merge parent and modified in another.

437

files that were moved/ copied in one merge parent and modified in another.

439

For example:

438

For example:

440

439

441

o ---> 4 another commit

440

o ---> 4 another commit

442

|

441

|

443

| o ---> 3 commit that modifies a.txt

442

| o ---> 3 commit that modifies a.txt

444

| /

443

| /

445

o / ---> 2 commit that moves a.txt to b.txt

444

o / ---> 2 commit that moves a.txt to b.txt

446

|/

445

|/

447

o ---> 1 merge base

446

o ---> 1 merge base

448

447

449

If we try to rebase revision 3 on revision 4, since there is no a.txt in

448

If we try to rebase revision 3 on revision 4, since there is no a.txt in

450

revision 4, and if user have copytrace disabled, we prints the following

449

revision 4, and if user have copytrace disabled, we prints the following

451

message:

450

message:

452

451

453

```other changed <file> which local deleted```

452

```other changed <file> which local deleted```

454

453

455

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

454

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

456

"dirmove".

455

"dirmove".

457

456

458

"copy" is a mapping from destination name -> source name,

457

"copy" is a mapping from destination name -> source name,

459

where source is in c1 and destination is in c2 or vice-versa.

458

where source is in c1 and destination is in c2 or vice-versa.

460

459

461

"movewithdir" is a mapping from source name -> destination name,

460

"movewithdir" is a mapping from source name -> destination name,

462

where the file at source present in one context but not the other

461

where the file at source present in one context but not the other

463

needs to be moved to destination by the merge process, because the

462

needs to be moved to destination by the merge process, because the

464

other context moved the directory it is in.

463

other context moved the directory it is in.

465

464

466

"diverge" is a mapping of source name -> list of destination names

465

"diverge" is a mapping of source name -> list of destination names

467

for divergent renames.

466

for divergent renames.

468

467

469

"renamedelete" is a mapping of source name -> list of destination

468

"renamedelete" is a mapping of source name -> list of destination

470

names for files deleted in c1 that were renamed in c2 or vice-versa.

469

names for files deleted in c1 that were renamed in c2 or vice-versa.

471

470

472

"dirmove" is a mapping of detected source dir -> destination dir renames.

471

"dirmove" is a mapping of detected source dir -> destination dir renames.

473

This is needed for handling changes to new files previously grafted into

472

This is needed for handling changes to new files previously grafted into

474

renamed directories.

473

renamed directories.

474

475

This function calls different copytracing algorithms based on config.

475

"""

476

"""

476

# avoid silly behavior for update from empty dir

477

# avoid silly behavior for update from empty dir

477

if not c1 or not c2 or c1 == c2:

478

if not c1 or not c2 or c1 == c2:

478

return {}, {}, {}, {}, {}

479

return {}, {}, {}, {}, {}

479

480

narrowmatch = c1.repo().narrowmatch()

481

narrowmatch = c1.repo().narrowmatch()

481

482

# avoid silly behavior for parent -> working dir

483

# avoid silly behavior for parent -> working dir

483

if c2.node() is None and c1.node() == repo.dirstate.p1():

484

if c2.node() is None and c1.node() == repo.dirstate.p1():

484

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

485

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

485

486

copytracing = repo.ui.config('experimental', 'copytrace')

487

copytracing = repo.ui.config('experimental', 'copytrace')

487

boolctrace = stringutil.parsebool(copytracing)

488

boolctrace = stringutil.parsebool(copytracing)

488

489

# Copy trace disabling is explicitly below the node == p1 logic above

490

# Copy trace disabling is explicitly below the node == p1 logic above

490

# because the logic above is required for a simple copy to be kept across a

491

# because the logic above is required for a simple copy to be kept across a

491

# rebase.

492

# rebase.

492

if copytracing == 'heuristics':

493

if copytracing == 'heuristics':

493

# Do full copytracing if only non-public revisions are involved as

494

# Do full copytracing if only non-public revisions are involved as

494

# that will be fast enough and will also cover the copies which could

495

# that will be fast enough and will also cover the copies which could

495

# be missed by heuristics

496

# be missed by heuristics

496

if _isfullcopytraceable(repo, c1, base):

497

if _isfullcopytraceable(repo, c1, base):

497

return _fullcopytracing(repo, c1, c2, base)

498

return _fullcopytracing(repo, c1, c2, base)

498

return _heuristicscopytracing(repo, c1, c2, base)

499

return _heuristicscopytracing(repo, c1, c2, base)

499

elif boolctrace is False:

500

elif boolctrace is False:

500

# stringutil.parsebool() returns None when it is unable to parse the

501

# stringutil.parsebool() returns None when it is unable to parse the

501

# value, so we should rely on making sure copytracing is on such cases

502

# value, so we should rely on making sure copytracing is on such cases

502

return {}, {}, {}, {}, {}

503

return {}, {}, {}, {}, {}

503

else:

504

else:

504

return _fullcopytracing(repo, c1, c2, base)

505

return _fullcopytracing(repo, c1, c2, base)

505

506

def _isfullcopytraceable(repo, c1, base):

507

def _isfullcopytraceable(repo, c1, base):

507

""" Checks that if base, source and destination are all no-public branches,

508

""" Checks that if base, source and destination are all no-public branches,

508

if yes let's use the full copytrace algorithm for increased capabilities

509

if yes let's use the full copytrace algorithm for increased capabilities

509

since it will be fast enough.

510

since it will be fast enough.

510

511

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

512

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

512

number of changesets from c1 to base such that if number of changesets are

513

number of changesets from c1 to base such that if number of changesets are

513

more than the limit, full copytracing algorithm won't be used.

514

more than the limit, full copytracing algorithm won't be used.

514

"""

515

"""

515

if c1.rev() is None:

516

if c1.rev() is None:

516

c1 = c1.p1()

517

c1 = c1.p1()

517

if c1.mutable() and base.mutable():

518

if c1.mutable() and base.mutable():

518

sourcecommitlimit = repo.ui.configint('experimental',

519

sourcecommitlimit = repo.ui.configint('experimental',

519

'copytrace.sourcecommitlimit')

520

'copytrace.sourcecommitlimit')

520

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

521

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

521

return commits < sourcecommitlimit

522

return commits < sourcecommitlimit

522

return False

523

return False

523

524

def _fullcopytracing(repo, c1, c2, base):

525

def _fullcopytracing(repo, c1, c2, base):

525

""" The full copytracing algorithm which finds all the new files that were

526

""" The full copytracing algorithm which finds all the new files that were

526

added from merge base up to the top commit and for each file it checks if

527

added from merge base up to the top commit and for each file it checks if

527

this file was copied from another file.

528

this file was copied from another file.

528

529

This is pretty slow when a lot of changesets are involved but will track all

530

This is pretty slow when a lot of changesets are involved but will track all

530

the copies.

531

the copies.

531

"""

532

"""

532

# In certain scenarios (e.g. graft, update or rebase), base can be

533

# In certain scenarios (e.g. graft, update or rebase), base can be

533

# overridden We still need to know a real common ancestor in this case We

534

# overridden We still need to know a real common ancestor in this case We

534

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

535

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

535

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

536

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

536

# caller may not know if the revision passed in lieu of the CA is a genuine

537

# caller may not know if the revision passed in lieu of the CA is a genuine

537

# common ancestor or not without explicitly checking it, it's better to

538

# common ancestor or not without explicitly checking it, it's better to

538

# determine that here.

539

# determine that here.

539

#

540

#

540

# base.isancestorof(wc) is False, work around that

541

# base.isancestorof(wc) is False, work around that

541

_c1 = c1.p1() if c1.rev() is None else c1

542

_c1 = c1.p1() if c1.rev() is None else c1

542

_c2 = c2.p1() if c2.rev() is None else c2

543

_c2 = c2.p1() if c2.rev() is None else c2

543

# an endpoint is "dirty" if it isn't a descendant of the merge base

544

# an endpoint is "dirty" if it isn't a descendant of the merge base

544

# if we have a dirty endpoint, we need to trigger graft logic, and also

545

# if we have a dirty endpoint, we need to trigger graft logic, and also

545

# keep track of which endpoint is dirty

546

# keep track of which endpoint is dirty

546

dirtyc1 = not base.isancestorof(_c1)

547

dirtyc1 = not base.isancestorof(_c1)

547

dirtyc2 = not base.isancestorof(_c2)

548

dirtyc2 = not base.isancestorof(_c2)

548

graft = dirtyc1 or dirtyc2

549

graft = dirtyc1 or dirtyc2

549

tca = base

550

tca = base

550

if graft:

551

if graft:

551

tca = _c1.ancestor(_c2)

552

tca = _c1.ancestor(_c2)

552

553

limit = _findlimit(repo, c1, c2)

554

limit = _findlimit(repo, c1, c2)

554

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

555

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

555

556

m1 = c1.manifest()

557

m1 = c1.manifest()

557

m2 = c2.manifest()

558

m2 = c2.manifest()

558

mb = base.manifest()

559

mb = base.manifest()

559

560

# gather data from _checkcopies:

561

# gather data from _checkcopies:

561

# - diverge = record all diverges in this dict

562

# - diverge = record all diverges in this dict

562

# - copy = record all non-divergent copies in this dict

563

# - copy = record all non-divergent copies in this dict

563

# - fullcopy = record all copies in this dict

564

# - fullcopy = record all copies in this dict

564

# - incomplete = record non-divergent partial copies here

565

# - incomplete = record non-divergent partial copies here

565

# - incompletediverge = record divergent partial copies here

566

# - incompletediverge = record divergent partial copies here

566

diverge = {} # divergence data is shared

567

diverge = {} # divergence data is shared

567

incompletediverge = {}

568

incompletediverge = {}

568

data1 = {'copy': {},

569

data1 = {'copy': {},

569

'fullcopy': {},

570

'fullcopy': {},

570

'incomplete': {},

571

'incomplete': {},

571

'diverge': diverge,

572

'diverge': diverge,

572

'incompletediverge': incompletediverge,

573

'incompletediverge': incompletediverge,

573

}

574

}

574

data2 = {'copy': {},

575

data2 = {'copy': {},

575

'fullcopy': {},

576

'fullcopy': {},

576

'incomplete': {},

577

'incomplete': {},

577

'diverge': diverge,

578

'diverge': diverge,

578

'incompletediverge': incompletediverge,

579

'incompletediverge': incompletediverge,

579

}

580

}

580

581

# find interesting file sets from manifests

582

# find interesting file sets from manifests

582

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

583

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

583

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

584

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

584

bothnew = sorted(addedinm1 & addedinm2)

585

bothnew = sorted(addedinm1 & addedinm2)

585

if tca == base:

586

if tca == base:

586

# unmatched file from base

587

# unmatched file from base

587

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

588

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

588

u1u, u2u = u1r, u2r

589

u1u, u2u = u1r, u2r

589

else:

590

else:

590

# unmatched file from base (DAG rotation in the graft case)

591

# unmatched file from base (DAG rotation in the graft case)

591

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

592

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

592

baselabel='base')

593

baselabel='base')

593

# unmatched file from topological common ancestors (no DAG rotation)

594

# unmatched file from topological common ancestors (no DAG rotation)

594

# need to recompute this for directory move handling when grafting

595

# need to recompute this for directory move handling when grafting

595

mta = tca.manifest()

596

mta = tca.manifest()

596

u1u, u2u = _computenonoverlap(repo, c1, c2,

597

u1u, u2u = _computenonoverlap(repo, c1, c2,

597

m1.filesnotin(mta, repo.narrowmatch()),

598

m1.filesnotin(mta, repo.narrowmatch()),

598

m2.filesnotin(mta, repo.narrowmatch()),

599

m2.filesnotin(mta, repo.narrowmatch()),

599

baselabel='topological common ancestor')

600

baselabel='topological common ancestor')

600

601

for f in u1u:

602

for f in u1u:

602

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

603

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

603

604

for f in u2u:

605

for f in u2u:

605

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

606

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

606

607

copy = dict(data1['copy'])

608

copy = dict(data1['copy'])

608

copy.update(data2['copy'])

609

copy.update(data2['copy'])

609

fullcopy = dict(data1['fullcopy'])

610

fullcopy = dict(data1['fullcopy'])

610

fullcopy.update(data2['fullcopy'])

611

fullcopy.update(data2['fullcopy'])

611

612

if dirtyc1:

613

if dirtyc1:

613

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

614

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

614

incompletediverge)

615

incompletediverge)

615

if dirtyc2:

616

if dirtyc2:

616

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

617

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

617

incompletediverge)

618

incompletediverge)

618

619

renamedelete = {}

620

renamedelete = {}

620

renamedeleteset = set()

621

renamedeleteset = set()

621

divergeset = set()

622

divergeset = set()

622

for of, fl in list(diverge.items()):

623

for of, fl in list(diverge.items()):

623

if len(fl) == 1 or of in c1 or of in c2:

624

if len(fl) == 1 or of in c1 or of in c2:

624

del diverge[of] # not actually divergent, or not a rename

625

del diverge[of] # not actually divergent, or not a rename

625

if of not in c1 and of not in c2:

626

if of not in c1 and of not in c2:

626

# renamed on one side, deleted on the other side, but filter

627

# renamed on one side, deleted on the other side, but filter

627

# out files that have been renamed and then deleted

628

# out files that have been renamed and then deleted

628

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

629

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

629

renamedeleteset.update(fl) # reverse map for below

630

renamedeleteset.update(fl) # reverse map for below

630

else:

631

else:

631

divergeset.update(fl) # reverse map for below

632

divergeset.update(fl) # reverse map for below

632

633

if bothnew:

634

if bothnew:

634

repo.ui.debug(" unmatched files new in both:\n %s\n"

635

repo.ui.debug(" unmatched files new in both:\n %s\n"

635

% "\n ".join(bothnew))

636

% "\n ".join(bothnew))

636

bothdiverge = {}

637

bothdiverge = {}

637

bothincompletediverge = {}

638

bothincompletediverge = {}

638

remainder = {}

639

remainder = {}

639

both1 = {'copy': {},

640

both1 = {'copy': {},

640

'fullcopy': {},

641

'fullcopy': {},

641

'incomplete': {},

642

'incomplete': {},

642

'diverge': bothdiverge,

643

'diverge': bothdiverge,

643

'incompletediverge': bothincompletediverge

644

'incompletediverge': bothincompletediverge

644

}

645

}

645

both2 = {'copy': {},

646

both2 = {'copy': {},

646

'fullcopy': {},

647

'fullcopy': {},

647

'incomplete': {},

648

'incomplete': {},

648

'diverge': bothdiverge,

649

'diverge': bothdiverge,

649

'incompletediverge': bothincompletediverge

650

'incompletediverge': bothincompletediverge

650

}

651

}

651

for f in bothnew:

652

for f in bothnew:

652

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

653

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

653

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

654

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

654

if dirtyc1 and dirtyc2:

655

if dirtyc1 and dirtyc2:

655

remainder = _combinecopies(both2['incomplete'], both1['incomplete'],

656

remainder = _combinecopies(both2['incomplete'], both1['incomplete'],

656

copy, bothdiverge, bothincompletediverge)

657

copy, bothdiverge, bothincompletediverge)

657

remainder1 = _combinecopies(both1['incomplete'], both2['incomplete'],

658

remainder1 = _combinecopies(both1['incomplete'], both2['incomplete'],

658

copy, bothdiverge, bothincompletediverge)

659

copy, bothdiverge, bothincompletediverge)

659

remainder.update(remainder1)

660

remainder.update(remainder1)

660

elif dirtyc1:

661

elif dirtyc1:

661

# incomplete copies may only be found on the "dirty" side for bothnew

662

# incomplete copies may only be found on the "dirty" side for bothnew

662

assert not both2['incomplete']

663

assert not both2['incomplete']

663

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

664

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

664

bothincompletediverge)

665

bothincompletediverge)

665

elif dirtyc2:

666

elif dirtyc2:

666

assert not both1['incomplete']

667

assert not both1['incomplete']

667

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

668

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

668

bothincompletediverge)

669

bothincompletediverge)

669

else:

670

else:

670

# incomplete copies and divergences can't happen outside grafts

671

# incomplete copies and divergences can't happen outside grafts

671

assert not both1['incomplete']

672

assert not both1['incomplete']

672

assert not both2['incomplete']

673

assert not both2['incomplete']

673

assert not bothincompletediverge

674

assert not bothincompletediverge

674

for f in remainder:

675

for f in remainder:

675

assert f not in bothdiverge

676

assert f not in bothdiverge

676

ic = remainder[f]

677

ic = remainder[f]

677

if ic[0] in (m1 if dirtyc1 else m2):

678

if ic[0] in (m1 if dirtyc1 else m2):

678

# backed-out rename on one side, but watch out for deleted files

679

# backed-out rename on one side, but watch out for deleted files

679

bothdiverge[f] = ic

680

bothdiverge[f] = ic

680

for of, fl in bothdiverge.items():

681

for of, fl in bothdiverge.items():

681

if len(fl) == 2 and fl[0] == fl[1]:

682

if len(fl) == 2 and fl[0] == fl[1]:

682

copy[fl[0]] = of # not actually divergent, just matching renames

683

copy[fl[0]] = of # not actually divergent, just matching renames

683

684

if fullcopy and repo.ui.debugflag:

685

if fullcopy and repo.ui.debugflag:

685

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

686

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

686

"% = renamed and deleted):\n")

687

"% = renamed and deleted):\n")

687

for f in sorted(fullcopy):

688

for f in sorted(fullcopy):

688

note = ""

689

note = ""

689

if f in copy:

690

if f in copy:

690

note += "*"

691

note += "*"

691

if f in divergeset:

692

if f in divergeset:

692

note += "!"

693

note += "!"

693

if f in renamedeleteset:

694

if f in renamedeleteset:

694

note += "%"

695

note += "%"

695

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

696

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

696

note))

697

note))

697

del divergeset

698

del divergeset

698

699

if not fullcopy:

700

if not fullcopy:

700

return copy, {}, diverge, renamedelete, {}

701

return copy, {}, diverge, renamedelete, {}

701

702

repo.ui.debug(" checking for directory renames\n")

703

repo.ui.debug(" checking for directory renames\n")

703

704

# generate a directory move map

705

# generate a directory move map

705

d1, d2 = c1.dirs(), c2.dirs()

706

d1, d2 = c1.dirs(), c2.dirs()

706

# Hack for adding '', which is not otherwise added, to d1 and d2

707

# Hack for adding '', which is not otherwise added, to d1 and d2

707

d1.addpath('/')

708

d1.addpath('/')

708

d2.addpath('/')

709

d2.addpath('/')

709

invalid = set()

710

invalid = set()

710

dirmove = {}

711

dirmove = {}

711

712

# examine each file copy for a potential directory move, which is

713

# examine each file copy for a potential directory move, which is

713

# when all the files in a directory are moved to a new directory

714

# when all the files in a directory are moved to a new directory

714

for dst, src in fullcopy.iteritems():

715

for dst, src in fullcopy.iteritems():

715

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

716

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

716

if dsrc in invalid:

717

if dsrc in invalid:

717

# already seen to be uninteresting

718

# already seen to be uninteresting

718

continue

719

continue

719

elif dsrc in d1 and ddst in d1:

720

elif dsrc in d1 and ddst in d1:

720

# directory wasn't entirely moved locally

721

# directory wasn't entirely moved locally

721

invalid.add(dsrc)

722

invalid.add(dsrc)

722

elif dsrc in d2 and ddst in d2:

723

elif dsrc in d2 and ddst in d2:

723

# directory wasn't entirely moved remotely

724

# directory wasn't entirely moved remotely

724

invalid.add(dsrc)

725

invalid.add(dsrc)

725

elif dsrc in dirmove and dirmove[dsrc] != ddst:

726

elif dsrc in dirmove and dirmove[dsrc] != ddst:

726

# files from the same directory moved to two different places

727

# files from the same directory moved to two different places

727

invalid.add(dsrc)

728

invalid.add(dsrc)

728

else:

729

else:

729

# looks good so far

730

# looks good so far

730

dirmove[dsrc] = ddst

731

dirmove[dsrc] = ddst

731

732

for i in invalid:

733

for i in invalid:

733

if i in dirmove:

734

if i in dirmove:

734

del dirmove[i]

735

del dirmove[i]

735

del d1, d2, invalid

736

del d1, d2, invalid

736

737

if not dirmove:

738

if not dirmove:

738

return copy, {}, diverge, renamedelete, {}

739

return copy, {}, diverge, renamedelete, {}

739

740

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

741

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

741

742

for d in dirmove:

743

for d in dirmove:

743

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

744

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

744

(d, dirmove[d]))

745

(d, dirmove[d]))

745

746

movewithdir = {}

747

movewithdir = {}

747

# check unaccounted nonoverlapping files against directory moves

748

# check unaccounted nonoverlapping files against directory moves

748

for f in u1r + u2r:

749

for f in u1r + u2r:

749

if f not in fullcopy:

750

if f not in fullcopy:

750

for d in dirmove:

751

for d in dirmove:

751

if f.startswith(d):

752

if f.startswith(d):

752

# new file added in a directory that was moved, move it

753

# new file added in a directory that was moved, move it

753

df = dirmove[d] + f[len(d):]

754

df = dirmove[d] + f[len(d):]

754

if df not in copy:

755

if df not in copy:

755

movewithdir[f] = df

756

movewithdir[f] = df

756

repo.ui.debug((" pending file src: '%s' -> "

757

repo.ui.debug((" pending file src: '%s' -> "

757

"dst: '%s'\n") % (f, df))

758

"dst: '%s'\n") % (f, df))

758

break

759

break

759

760

return copy, movewithdir, diverge, renamedelete, dirmove

761

return copy, movewithdir, diverge, renamedelete, dirmove

761

762

def _heuristicscopytracing(repo, c1, c2, base):

763

def _heuristicscopytracing(repo, c1, c2, base):

763

""" Fast copytracing using filename heuristics

764

""" Fast copytracing using filename heuristics

764

765

Assumes that moves or renames are of following two types:

766

Assumes that moves or renames are of following two types:

766

767

1) Inside a directory only (same directory name but different filenames)

768

1) Inside a directory only (same directory name but different filenames)

768

2) Move from one directory to another

769

2) Move from one directory to another

769

(same filenames but different directory names)

770

(same filenames but different directory names)

770

771

Works only when there are no merge commits in the "source branch".

772

Works only when there are no merge commits in the "source branch".

772

Source branch is commits from base up to c2 not including base.

773

Source branch is commits from base up to c2 not including base.

773

774

If merge is involved it fallbacks to _fullcopytracing().

775

If merge is involved it fallbacks to _fullcopytracing().

775

776

Can be used by setting the following config:

777

Can be used by setting the following config:

777

778

[experimental]

779

[experimental]

779

copytrace = heuristics

780

copytrace = heuristics

780

781

In some cases the copy/move candidates found by heuristics can be very large

782

In some cases the copy/move candidates found by heuristics can be very large

782

in number and that will make the algorithm slow. The number of possible

783

in number and that will make the algorithm slow. The number of possible

783

candidates to check can be limited by using the config

784

candidates to check can be limited by using the config

784

`experimental.copytrace.movecandidateslimit` which defaults to 100.

785

`experimental.copytrace.movecandidateslimit` which defaults to 100.

785

"""

786

"""

786

787

if c1.rev() is None:

788

if c1.rev() is None:

788

c1 = c1.p1()

789

c1 = c1.p1()

789

if c2.rev() is None:

790

if c2.rev() is None:

790

c2 = c2.p1()

791

c2 = c2.p1()

791

792

copies = {}

793

copies = {}

793

794

changedfiles = set()

795

changedfiles = set()

795

m1 = c1.manifest()

796

m1 = c1.manifest()

796

if not repo.revs('%d::%d', base.rev(), c2.rev()):

797

if not repo.revs('%d::%d', base.rev(), c2.rev()):

797

# If base is not in c2 branch, we switch to fullcopytracing

798

# If base is not in c2 branch, we switch to fullcopytracing

798

repo.ui.debug("switching to full copytracing as base is not "

799

repo.ui.debug("switching to full copytracing as base is not "

799

"an ancestor of c2\n")

800

"an ancestor of c2\n")

800

return _fullcopytracing(repo, c1, c2, base)

801

return _fullcopytracing(repo, c1, c2, base)

801

802

ctx = c2

803

ctx = c2

803

while ctx != base:

804

while ctx != base:

804

if len(ctx.parents()) == 2:

805

if len(ctx.parents()) == 2:

805

# To keep things simple let's not handle merges

806

# To keep things simple let's not handle merges

806

repo.ui.debug("switching to full copytracing because of merges\n")

807

repo.ui.debug("switching to full copytracing because of merges\n")

807

return _fullcopytracing(repo, c1, c2, base)

808

return _fullcopytracing(repo, c1, c2, base)

808

changedfiles.update(ctx.files())

809

changedfiles.update(ctx.files())

809

ctx = ctx.p1()

810

ctx = ctx.p1()

810

811

cp = _forwardcopies(base, c2)

812

cp = _forwardcopies(base, c2)

812

for dst, src in cp.iteritems():

813

for dst, src in cp.iteritems():

813

if src in m1:

814

if src in m1:

814

copies[dst] = src

815

copies[dst] = src

815

816

# file is missing if it isn't present in the destination, but is present in

817

# file is missing if it isn't present in the destination, but is present in

817

# the base and present in the source.

818

# the base and present in the source.

818

# Presence in the base is important to exclude added files, presence in the

819

# Presence in the base is important to exclude added files, presence in the

819

# source is important to exclude removed files.

820

# source is important to exclude removed files.

820

filt = lambda f: f not in m1 and f in base and f in c2

821

filt = lambda f: f not in m1 and f in base and f in c2

821

missingfiles = [f for f in changedfiles if filt(f)]

822

missingfiles = [f for f in changedfiles if filt(f)]

822

823

if missingfiles:

824

if missingfiles:

824

basenametofilename = collections.defaultdict(list)

825

basenametofilename = collections.defaultdict(list)

825

dirnametofilename = collections.defaultdict(list)

826

dirnametofilename = collections.defaultdict(list)

826

827

for f in m1.filesnotin(base.manifest()):

828

for f in m1.filesnotin(base.manifest()):

828

basename = os.path.basename(f)

829

basename = os.path.basename(f)

829

dirname = os.path.dirname(f)

830

dirname = os.path.dirname(f)

830

basenametofilename[basename].append(f)

831

basenametofilename[basename].append(f)

831

dirnametofilename[dirname].append(f)

832

dirnametofilename[dirname].append(f)

832

833

for f in missingfiles:

834

for f in missingfiles:

834

basename = os.path.basename(f)

835

basename = os.path.basename(f)

835

dirname = os.path.dirname(f)

836

dirname = os.path.dirname(f)

836

samebasename = basenametofilename[basename]

837

samebasename = basenametofilename[basename]

837

samedirname = dirnametofilename[dirname]

838

samedirname = dirnametofilename[dirname]

838

movecandidates = samebasename + samedirname

839

movecandidates = samebasename + samedirname

839

# f is guaranteed to be present in c2, that's why

840

# f is guaranteed to be present in c2, that's why

840

# c2.filectx(f) won't fail

841

# c2.filectx(f) won't fail

841

f2 = c2.filectx(f)

842

f2 = c2.filectx(f)

842

# we can have a lot of candidates which can slow down the heuristics

843

# we can have a lot of candidates which can slow down the heuristics

843

# config value to limit the number of candidates moves to check

844

# config value to limit the number of candidates moves to check

844

maxcandidates = repo.ui.configint('experimental',

845

maxcandidates = repo.ui.configint('experimental',

845

'copytrace.movecandidateslimit')

846

'copytrace.movecandidateslimit')

846

847

if len(movecandidates) > maxcandidates:

848

if len(movecandidates) > maxcandidates:

848

repo.ui.status(_("skipping copytracing for '%s', more "

849

repo.ui.status(_("skipping copytracing for '%s', more "

849

"candidates than the limit: %d\n")

850

"candidates than the limit: %d\n")

850

% (f, len(movecandidates)))

851

% (f, len(movecandidates)))

851

continue

852

continue

852

853

for candidate in movecandidates:

854

for candidate in movecandidates:

854

f1 = c1.filectx(candidate)

855

f1 = c1.filectx(candidate)

855

if _related(f1, f2):

856

if _related(f1, f2):

856

# if there are a few related copies then we'll merge

857

# if there are a few related copies then we'll merge

857

# changes into all of them. This matches the behaviour

858

# changes into all of them. This matches the behaviour

858

# of upstream copytracing

859

# of upstream copytracing

859

copies[candidate] = f

860

copies[candidate] = f

860

861

return copies, {}, {}, {}, {}

862

return copies, {}, {}, {}, {}

862

863

def _related(f1, f2):

864

def _related(f1, f2):

864

"""return True if f1 and f2 filectx have a common ancestor

865

"""return True if f1 and f2 filectx have a common ancestor

865

866

Walk back to common ancestor to see if the two files originate

867

Walk back to common ancestor to see if the two files originate

867

from the same file. Since workingfilectx's rev() is None it messes

868

from the same file. Since workingfilectx's rev() is None it messes

868

up the integer comparison logic, hence the pre-step check for

869

up the integer comparison logic, hence the pre-step check for

869

None (f1 and f2 can only be workingfilectx's initially).

870

None (f1 and f2 can only be workingfilectx's initially).

870

"""

871

"""

871

872

if f1 == f2:

873

if f1 == f2:

873

return True # a match

874

return True # a match

874

875

g1, g2 = f1.ancestors(), f2.ancestors()

876

g1, g2 = f1.ancestors(), f2.ancestors()

876

try:

877

try:

877

f1r, f2r = f1.linkrev(), f2.linkrev()

878

f1r, f2r = f1.linkrev(), f2.linkrev()

878

879

if f1r is None:

880

if f1r is None:

880

f1 = next(g1)

881

f1 = next(g1)

881

if f2r is None:

882

if f2r is None:

882

f2 = next(g2)

883

f2 = next(g2)

883

884

while True:

885

while True:

885

f1r, f2r = f1.linkrev(), f2.linkrev()

886

f1r, f2r = f1.linkrev(), f2.linkrev()

886

if f1r > f2r:

887

if f1r > f2r:

887

f1 = next(g1)

888

f1 = next(g1)

888

elif f2r > f1r:

889

elif f2r > f1r:

889

f2 = next(g2)

890

f2 = next(g2)

890

else: # f1 and f2 point to files in the same linkrev

891

else: # f1 and f2 point to files in the same linkrev

891

return f1 == f2 # true if they point to the same file

892

return f1 == f2 # true if they point to the same file

892

except StopIteration:

893

except StopIteration:

893

return False

894

return False

894

895

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

896

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

896

"""

897

"""

897

check possible copies of f from msrc to mdst

898

check possible copies of f from msrc to mdst

898

899

srcctx = starting context for f in msrc

900

srcctx = starting context for f in msrc

900

dstctx = destination context for f in mdst

901

dstctx = destination context for f in mdst

901

f = the filename to check (as in msrc)

902

f = the filename to check (as in msrc)

902

base = the changectx used as a merge base

903

base = the changectx used as a merge base

903

tca = topological common ancestor for graft-like scenarios

904

tca = topological common ancestor for graft-like scenarios

904

remotebase = True if base is outside tca::srcctx, False otherwise

905

remotebase = True if base is outside tca::srcctx, False otherwise

905

limit = the rev number to not search beyond

906

limit = the rev number to not search beyond

906

data = dictionary of dictionary to store copy data. (see mergecopies)

907

data = dictionary of dictionary to store copy data. (see mergecopies)

907

908

note: limit is only an optimization, and provides no guarantee that

909

note: limit is only an optimization, and provides no guarantee that

909

irrelevant revisions will not be visited

910

irrelevant revisions will not be visited

910

there is no easy way to make this algorithm stop in a guaranteed way

911

there is no easy way to make this algorithm stop in a guaranteed way

911

once it "goes behind a certain revision".

912

once it "goes behind a certain revision".

912

"""

913

"""

913

914

msrc = srcctx.manifest()

915

msrc = srcctx.manifest()

915

mdst = dstctx.manifest()

916

mdst = dstctx.manifest()

916

mb = base.manifest()

917

mb = base.manifest()

917

mta = tca.manifest()

918

mta = tca.manifest()

918

# Might be true if this call is about finding backward renames,

919

# Might be true if this call is about finding backward renames,

919

# This happens in the case of grafts because the DAG is then rotated.

920

# This happens in the case of grafts because the DAG is then rotated.

920

# If the file exists in both the base and the source, we are not looking

921

# If the file exists in both the base and the source, we are not looking

921

# for a rename on the source side, but on the part of the DAG that is

922

# for a rename on the source side, but on the part of the DAG that is

922

# traversed backwards.

923

# traversed backwards.

923

#

924

#

924

# In the case there is both backward and forward renames (before and after

925

# In the case there is both backward and forward renames (before and after

925

# the base) this is more complicated as we must detect a divergence.

926

# the base) this is more complicated as we must detect a divergence.

926

# We use 'backwards = False' in that case.

927

# We use 'backwards = False' in that case.

927

backwards = not remotebase and base != tca and f in mb

928

backwards = not remotebase and base != tca and f in mb

928

getsrcfctx = _makegetfctx(srcctx)

929

getsrcfctx = _makegetfctx(srcctx)

929

getdstfctx = _makegetfctx(dstctx)

930

getdstfctx = _makegetfctx(dstctx)

930

931

if msrc[f] == mb.get(f) and not remotebase:

932

if msrc[f] == mb.get(f) and not remotebase:

932

# Nothing to merge

933

# Nothing to merge

933

return

934

return

934

935

of = None

936

of = None

936

seen = {f}

937

seen = {f}

937

for oc in getsrcfctx(f, msrc[f]).ancestors():

938

for oc in getsrcfctx(f, msrc[f]).ancestors():

938

of = oc.path()

939

of = oc.path()

939

if of in seen:

940

if of in seen:

940

# check limit late - grab last rename before

941

# check limit late - grab last rename before

941

if oc.linkrev() < limit:

942

if oc.linkrev() < limit:

942

break

943

break

943

continue

944

continue

944

seen.add(of)

945

seen.add(of)

945

946

# remember for dir rename detection

947

# remember for dir rename detection

947

if backwards:

948

if backwards:

948

data['fullcopy'][of] = f # grafting backwards through renames

949

data['fullcopy'][of] = f # grafting backwards through renames

949

else:

950

else:

950

data['fullcopy'][f] = of

951

data['fullcopy'][f] = of

951

if of not in mdst:

952

if of not in mdst:

952

continue # no match, keep looking

953

continue # no match, keep looking

953

if mdst[of] == mb.get(of):

954

if mdst[of] == mb.get(of):

954

return # no merge needed, quit early

955

return # no merge needed, quit early

955

c2 = getdstfctx(of, mdst[of])

956

c2 = getdstfctx(of, mdst[of])

956

# c2 might be a plain new file on added on destination side that is

957

# c2 might be a plain new file on added on destination side that is

957

# unrelated to the droids we are looking for.

958

# unrelated to the droids we are looking for.

958

cr = _related(oc, c2)

959

cr = _related(oc, c2)

959

if cr and (of == f or of == c2.path()): # non-divergent

960

if cr and (of == f or of == c2.path()): # non-divergent

960

if backwards:

961

if backwards:

961

data['copy'][of] = f

962

data['copy'][of] = f

962

elif of in mb:

963

elif of in mb:

963

data['copy'][f] = of

964

data['copy'][f] = of

964

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

965

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

965

data['copy'][of] = f

966

data['copy'][of] = f

966

del data['fullcopy'][f]

967

del data['fullcopy'][f]

967

data['fullcopy'][of] = f

968

data['fullcopy'][of] = f

968

else: # divergence w.r.t. graft CA on one side of topological CA

969

else: # divergence w.r.t. graft CA on one side of topological CA

969

for sf in seen:

970

for sf in seen:

970

if sf in mb:

971

if sf in mb:

971

assert sf not in data['diverge']

972

assert sf not in data['diverge']

972

data['diverge'][sf] = [f, of]

973

data['diverge'][sf] = [f, of]

973

break

974

break

974

return

975

return

975

976

if of in mta:

977

if of in mta:

977

if backwards or remotebase:

978

if backwards or remotebase:

978

data['incomplete'][of] = f

979

data['incomplete'][of] = f

979

else:

980

else:

980

for sf in seen:

981

for sf in seen:

981

if sf in mb:

982

if sf in mb:

982

if tca == base:

983

if tca == base:

983

data['diverge'].setdefault(sf, []).append(f)

984

data['diverge'].setdefault(sf, []).append(f)

984

else:

985

else:

985

data['incompletediverge'][sf] = [of, f]

986

data['incompletediverge'][sf] = [of, f]

986

return

987

return

987

988

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

989

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

989

"""reproduce copies from fromrev to rev in the dirstate

990

"""reproduce copies from fromrev to rev in the dirstate

990

991

If skiprev is specified, it's a revision that should be used to

992

If skiprev is specified, it's a revision that should be used to

992

filter copy records. Any copies that occur between fromrev and

993

filter copy records. Any copies that occur between fromrev and

993

skiprev will not be duplicated, even if they appear in the set of

994

skiprev will not be duplicated, even if they appear in the set of

994

copies between fromrev and rev.

995

copies between fromrev and rev.

995

"""

996

"""

996

exclude = {}

997

exclude = {}

997

ctraceconfig = repo.ui.config('experimental', 'copytrace')

998

ctraceconfig = repo.ui.config('experimental', 'copytrace')

998

bctrace = stringutil.parsebool(ctraceconfig)

999

bctrace = stringutil.parsebool(ctraceconfig)

999

if (skiprev is not None and

1000

if (skiprev is not None and

1000

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

1001

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

1001

# copytrace='off' skips this line, but not the entire function because

1002

# copytrace='off' skips this line, but not the entire function because

1002

# the line below is O(size of the repo) during a rebase, while the rest

1003

# the line below is O(size of the repo) during a rebase, while the rest

1003

# of the function is much faster (and is required for carrying copy

1004

# of the function is much faster (and is required for carrying copy

1004

# metadata across the rebase anyway).

1005

# metadata across the rebase anyway).

1005

exclude = pathcopies(repo[fromrev], repo[skiprev])

1006

exclude = pathcopies(repo[fromrev], repo[skiprev])

1006

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

1007

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

1007

# copies.pathcopies returns backward renames, so dst might not

1008

# copies.pathcopies returns backward renames, so dst might not

1008

# actually be in the dirstate

1009

# actually be in the dirstate

1009

if dst in exclude:

1010

if dst in exclude:

1010

continue

1011

continue

1011

wctx[dst].markcopied(src)

1012

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             def _findlimit(repo, ctxa, ctxb):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 wdirparents = None
                 a = ctxa.rev()
                 b = ctxb.rev()
                 if a is None:
                     wdirparents = (ctxa.p1(), ctxa.p2())
                     a = node.wdirrev
                 if b is None:
                     assert not wdirparents
                     wdirparents = (ctxb.p1(), ctxb.p2())
                     b = node.wdirrev
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 limit = node.wdirrev
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == node.wdirrev:
                         parents = [pctx.rev() for pctx in wdirparents]
                     else:
                         parents = cl.parentrevs(r)
                     if parents[1] == node.nullrev:
                         parents = parents[:1]
                     for p in parents:
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies a->b"""
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 for k, v in list(t.items()):
                     # remove criss-crossed copies
                     if k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=node.nullrev):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and not f.isintroducedafter(limit):
                         return None
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 return (repo.ui.config('experimental', 'copies.read-from') ==
                         'compatibility')
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg('debug.copies:    looking into rename from %s to %s\n'
                         % (a, b))
                 limit = _findlimit(repo, a, b)
                 if debug:
                     dbg('debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg('debug.copies:      missing file to search: %d\n' % len(missing))
                 for f in missing:
                     if debug:
                         dbg('debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         if debug:
                             dbg('debug.copies:          rename of: %s\n' % ofctx._path)
                         cm[f] = ofctx.path()
                     if debug:
                         dbg('debug.copies:          time: %f seconds\n'
                             % (util.timer() - start))
                 return cm
             def _changesetforwardcopies(a, b, match):
                 if a.rev() == node.nullrev:
                     return {}
                 repo = a.repo()
                 children = {}
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                 roots = set(children) - set(missingrevs)
                 # 'work' contains 3-tuples of a (revision number, parent number, copies).
                 # The parent number is only used for knowing which parent the copies dict
                 # came from.
                 work = [(r, 1, {}) for r in roots]
                 heapq.heapify(work)
                 while work:
                     r, i1, copies1 = heapq.heappop(work)
                     if work and work[0][0] == r:
                         # We are tracing copies from both parents
                         r, i2, copies2 = heapq.heappop(work)
                         copies = {}
                         ctx = repo[r]
                         p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
                         allcopies = set(copies1) | set(copies2)
                         # TODO: perhaps this filtering should be done as long as ctx
                         # is merge, whether or not we're tracing from both parent.
                         for dst in allcopies:
                             if not match(dst):
                                 continue
                             if dst not in copies2:
                                 # Copied on p1 side: mark as copy from p1 side if it didn't
                                 # already exist on p2 side
                                 if dst not in p2man:
                                     copies[dst] = copies1[dst]
                             elif dst not in copies1:
                                 # Copied on p2 side: mark as copy from p2 side if it didn't
                                 # already exist on p1 side
                                 if dst not in p1man:
                                     copies[dst] = copies2[dst]
                             else:
                                 # Copied on both sides: mark as copy from p1 side
                                 copies[dst] = copies1[dst]
                     else:
                         copies = copies1
                     if r == b.rev():
                         return copies
                     for c in children[r]:
                         childctx = repo[c]
                         if r == childctx.p1().rev():
                             parent = 1
                             childcopies = childctx.p1copies()
                         else:
                             assert r == childctx.p2().rev()
                             parent = 2
                             childcopies = childctx.p2copies()
                         if not match.always():
                             childcopies = {dst: src for dst, src in childcopies.items()
                                            if match(dst)}
                         childcopies = _chain(a, childctx, copies, childcopies)
                         heapq.heappush(work, (c, parent, childcopies))
                 assert False
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b._repo, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b._repo, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 if debug:
                     repo.ui.debug('debug.copies: searching copies from %s to %s\n'
                                   % (x, y))
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: forward\n')
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: backward\n')
                     return _backwardrenames(x, y, match=match)
                 if debug:
                     repo.ui.debug('debug.copies: search mode: combined\n')
                 return _chain(x, y, _backwardrenames(x, a, match=match),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirfilenodeids:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
-                The function calling different copytracing algorithms on the basis of config
+                Finds moves and copies between context c1 and c2 that are relevant for
-                which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
+                This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 boolctrace = stringutil.parsebool(copytracing)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 elif boolctrace is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.isancestorof(wc) is False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not base.isancestorof(_c1)
                 dirtyc2 = not base.isancestorof(_c2)
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1, c2)
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2,
                                                   m1.filesnotin(mta, repo.narrowmatch()),
                                                   m2.filesnotin(mta, repo.narrowmatch()),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 if dirtyc2:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1 and dirtyc2:
                     remainder = _combinecopies(both2['incomplete'], both1['incomplete'],
                                                copy, bothdiverge, bothincompletediverge)
                     remainder1 = _combinecopies(both1['incomplete'], both2['incomplete'],
                                                copy, bothdiverge, bothincompletediverge)
                     remainder.update(remainder1)
                 elif dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if oc.linkrev() < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2)
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config('experimental', 'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if (skiprev is not None and
                     (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)