upstream/mercurial-mirror Commit - r43593:83bb1e89

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

12

import os

11

import os

13

12

14

from .i18n import _

13

from .i18n import _

15

14

16

15

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

17

19

from . import (

18

from . import (

20

error,

19

error,

21

match as matchmod,

20

match as matchmod,

22

node,

21

node,

23

pathutil,

22

pathutil,

24

pycompat,

23

pycompat,

25

util,

24

util,

26

)

25

)

27

26

28

from .revlogutils import sidedata as sidedatamod

27

from .revlogutils import sidedata as sidedatamod

29

28

30

from .utils import stringutil

29

from .utils import stringutil

31

30

32

31

33

def _filter(src, dst, t):

32

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

33

"""filters out invalid copies after chaining"""

35

34

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

35

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

36

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

37

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

38

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

39

# then was renamed between 'mid' and 'dst'.

41

#

40

#

42

# case src mid dst result

41

# case src mid dst result

43

# 1 x y - -

42

# 1 x y - -

44

# 2 x y y x->y

43

# 2 x y y x->y

45

# 3 x y x -

44

# 3 x y x -

46

# 4 x y z x->z

45

# 4 x y z x->z

47

# 5 - x y -

46

# 5 - x y -

48

# 6 x x y x->y

47

# 6 x x y x->y

49

#

48

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

49

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

50

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

51

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

52

# Cases 1, 3, and 5 are then removed by _filter().

54

53

55

for k, v in list(t.items()):

54

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

55

# remove copies from files that didn't exist

57

if v not in src:

56

if v not in src:

58

del t[k]

57

del t[k]

59

# remove criss-crossed copies

58

# remove criss-crossed copies

60

elif k in src and v in dst:

59

elif k in src and v in dst:

61

del t[k]

60

del t[k]

62

# remove copies to files that were then removed

61

# remove copies to files that were then removed

63

elif k not in dst:

62

elif k not in dst:

64

del t[k]

63

del t[k]

65

64

66

65

67

def _chain(a, b):

66

def _chain(a, b):

68

"""chain two sets of copies 'a' and 'b'"""

67

"""chain two sets of copies 'a' and 'b'"""

69

t = a.copy()

68

t = a.copy()

70

for k, v in pycompat.iteritems(b):

69

for k, v in pycompat.iteritems(b):

71

if v in t:

70

if v in t:

72

t[k] = t[v]

71

t[k] = t[v]

73

else:

72

else:

74

t[k] = v

73

t[k] = v

75

return t

74

return t

76

75

77

76

78

def _tracefile(fctx, am, basemf):

77

def _tracefile(fctx, am, basemf):

79

"""return file context that is the ancestor of fctx present in ancestor

78

"""return file context that is the ancestor of fctx present in ancestor

80

manifest am

79

manifest am

81

80

82

Note: we used to try and stop after a given limit, however checking if that

81

Note: we used to try and stop after a given limit, however checking if that

83

limit is reached turned out to be very expensive. we are better off

82

limit is reached turned out to be very expensive. we are better off

84

disabling that feature."""

83

disabling that feature."""

85

84

86

for f in fctx.ancestors():

85

for f in fctx.ancestors():

87

path = f.path()

86

path = f.path()

88

if am.get(path, None) == f.filenode():

87

if am.get(path, None) == f.filenode():

89

return path

88

return path

90

if basemf and basemf.get(path, None) == f.filenode():

89

if basemf and basemf.get(path, None) == f.filenode():

91

return path

90

return path

92

91

93

92

94

def _dirstatecopies(repo, match=None):

93

def _dirstatecopies(repo, match=None):

95

ds = repo.dirstate

94

ds = repo.dirstate

96

c = ds.copies().copy()

95

c = ds.copies().copy()

97

for k in list(c):

96

for k in list(c):

98

if ds[k] not in b'anm' or (match and not match(k)):

97

if ds[k] not in b'anm' or (match and not match(k)):

99

del c[k]

98

del c[k]

100

return c

99

return c

101

100

102

101

103

def _computeforwardmissing(a, b, match=None):

102

def _computeforwardmissing(a, b, match=None):

104

"""Computes which files are in b but not a.

103

"""Computes which files are in b but not a.

105

This is its own function so extensions can easily wrap this call to see what

104

This is its own function so extensions can easily wrap this call to see what

106

files _forwardcopies is about to process.

105

files _forwardcopies is about to process.

107

"""

106

"""

108

ma = a.manifest()

107

ma = a.manifest()

109

mb = b.manifest()

108

mb = b.manifest()

110

return mb.filesnotin(ma, match=match)

109

return mb.filesnotin(ma, match=match)

111

110

112

111

113

def usechangesetcentricalgo(repo):

112

def usechangesetcentricalgo(repo):

114

"""Checks if we should use changeset-centric copy algorithms"""

113

"""Checks if we should use changeset-centric copy algorithms"""

115

if repo.filecopiesmode == b'changeset-sidedata':

114

if repo.filecopiesmode == b'changeset-sidedata':

116

return True

115

return True

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

116

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

118

changesetsource = (b'changeset-only', b'compatibility')

117

changesetsource = (b'changeset-only', b'compatibility')

119

return readfrom in changesetsource

118

return readfrom in changesetsource

120

119

121

120

122

def _committedforwardcopies(a, b, base, match):

121

def _committedforwardcopies(a, b, base, match):

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

122

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

124

# files might have to be traced back to the fctx parent of the last

123

# files might have to be traced back to the fctx parent of the last

125

# one-side-only changeset, but not further back than that

124

# one-side-only changeset, but not further back than that

126

repo = a._repo

125

repo = a._repo

127

126

128

if usechangesetcentricalgo(repo):

127

if usechangesetcentricalgo(repo):

129

return _changesetforwardcopies(a, b, match)

128

return _changesetforwardcopies(a, b, match)

130

129

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

130

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

132

dbg = repo.ui.debug

131

dbg = repo.ui.debug

133

if debug:

132

if debug:

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

133

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

135

am = a.manifest()

134

am = a.manifest()

136

basemf = None if base is None else base.manifest()

135

basemf = None if base is None else base.manifest()

137

136

138

# find where new files came from

137

# find where new files came from

139

# we currently don't try to find where old files went, too expensive

138

# we currently don't try to find where old files went, too expensive

140

# this means we can miss a case like 'hg rm b; hg cp a b'

139

# this means we can miss a case like 'hg rm b; hg cp a b'

141

cm = {}

140

cm = {}

142

141

143

# Computing the forward missing is quite expensive on large manifests, since

142

# Computing the forward missing is quite expensive on large manifests, since

144

# it compares the entire manifests. We can optimize it in the common use

143

# it compares the entire manifests. We can optimize it in the common use

145

# case of computing what copies are in a commit versus its parent (like

144

# case of computing what copies are in a commit versus its parent (like

146

# during a rebase or histedit). Note, we exclude merge commits from this

145

# during a rebase or histedit). Note, we exclude merge commits from this

147

# optimization, since the ctx.files() for a merge commit is not correct for

146

# optimization, since the ctx.files() for a merge commit is not correct for

148

# this comparison.

147

# this comparison.

149

forwardmissingmatch = match

148

forwardmissingmatch = match

150

if b.p1() == a and b.p2().node() == node.nullid:

149

if b.p1() == a and b.p2().node() == node.nullid:

151

filesmatcher = matchmod.exact(b.files())

150

filesmatcher = matchmod.exact(b.files())

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

151

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

152

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

154

153

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

154

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

156

155

157

if debug:

156

if debug:

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

157

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

159

158

160

for f in sorted(missing):

159

for f in sorted(missing):

161

if debug:

160

if debug:

162

dbg(b'debug.copies: tracing file: %s\n' % f)

161

dbg(b'debug.copies: tracing file: %s\n' % f)

163

fctx = b[f]

162

fctx = b[f]

164

fctx._ancestrycontext = ancestrycontext

163

fctx._ancestrycontext = ancestrycontext

165

164

166

if debug:

165

if debug:

167

start = util.timer()

166

start = util.timer()

168

opath = _tracefile(fctx, am, basemf)

167

opath = _tracefile(fctx, am, basemf)

169

if opath:

168

if opath:

170

if debug:

169

if debug:

171

dbg(b'debug.copies: rename of: %s\n' % opath)

170

dbg(b'debug.copies: rename of: %s\n' % opath)

172

cm[f] = opath

171

cm[f] = opath

173

if debug:

172

if debug:

174

dbg(

173

dbg(

175

b'debug.copies: time: %f seconds\n'

174

b'debug.copies: time: %f seconds\n'

176

% (util.timer() - start)

175

% (util.timer() - start)

177

)

176

)

178

return cm

177

return cm

179

178

180

179

181

def _revinfogetter(repo):

180

def _revinfogetter(repo):

182

"""return a function that return multiple data given a <rev>"i

181

"""return a function that return multiple data given a <rev>"i

183

182

184

* p1: revision number of first parent

183

* p1: revision number of first parent

185

* p2: revision number of first parent

184

* p2: revision number of first parent

186

* p1copies: mapping of copies from p1

185

* p1copies: mapping of copies from p1

187

* p2copies: mapping of copies from p2

186

* p2copies: mapping of copies from p2

188

* removed: a list of removed files

187

* removed: a list of removed files

189

"""

188

"""

190

cl = repo.changelog

189

cl = repo.changelog

191

parents = cl.parentrevs

190

parents = cl.parentrevs

192

191

193

if repo.filecopiesmode == b'changeset-sidedata':

192

if repo.filecopiesmode == b'changeset-sidedata':

194

changelogrevision = cl.changelogrevision

193

changelogrevision = cl.changelogrevision

195

flags = cl.flags

194

flags = cl.flags

196

195

197

def revinfo(rev):

196

def revinfo(rev):

198

p1, p2 = parents(rev)

197

p1, p2 = parents(rev)

199

if flags(rev) & REVIDX_SIDEDATA:

198

if flags(rev) & REVIDX_SIDEDATA:

200

c = changelogrevision(rev)

199

c = changelogrevision(rev)

201

p1copies = c.p1copies

200

p1copies = c.p1copies

202

p2copies = c.p2copies

201

p2copies = c.p2copies

203

removed = c.filesremoved

202

removed = c.filesremoved

204

else:

203

else:

205

p1copies = {}

204

p1copies = {}

206

p2copies = {}

205

p2copies = {}

207

removed = ()

206

removed = ()

208

return p1, p2, p1copies, p2copies, removed

207

return p1, p2, p1copies, p2copies, removed

209

208

210

else:

209

else:

211

210

212

def revinfo(rev):

211

def revinfo(rev):

213

p1, p2 = parents(rev)

212

p1, p2 = parents(rev)

214

ctx = repo[rev]

213

ctx = repo[rev]

215

p1copies, p2copies = ctx._copies

214

p1copies, p2copies = ctx._copies

216

removed = ctx.filesremoved()

215

removed = ctx.filesremoved()

217

return p1, p2, p1copies, p2copies, removed

216

return p1, p2, p1copies, p2copies, removed

218

217

219

return revinfo

218

return revinfo

220

219

221

220

222

def _changesetforwardcopies(a, b, match):

221

def _changesetforwardcopies(a, b, match):

223

if a.rev() in (node.nullrev, b.rev()):

222

if a.rev() in (node.nullrev, b.rev()):

224

return {}

223

return {}

225

224

226

repo = a.repo().unfiltered()

225

repo = a.repo().unfiltered()

227

children = {}

226

children = {}

228

revinfo = _revinfogetter(repo)

227

revinfo = _revinfogetter(repo)

229

228

230

cl = repo.changelog

229

cl = repo.changelog

231

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

230

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

231

mrset = set(missingrevs)

232

roots = set()

232

for r in missingrevs:

233

for r in missingrevs:

233

for p in cl.parentrevs(r):

234

for p in cl.parentrevs(r):

234

if p == node.nullrev:

235

if p == node.nullrev:

235

continue

236

continue

236

if p not in children:

237

if p not in children:

237

children[p] = [r]

238

children[p] = [r]

238

else:

239

else:

239

children[p].append(r)

240

children[p].append(r)

241

if p not in mrset:

242

roots.add(p)

243

if not roots:

244

# no common revision to track copies from

245

return {}

246

min_root = min(roots)

240

247

241

roots = set(children) - set(missingrevs)

248

from_head = set(

242

work = list(roots)

249

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

250

)

251

252

iterrevs = set(from_head)

253

iterrevs &= mrset

254

iterrevs.update(roots)

255

iterrevs.remove(b.rev())

243

all_copies = {r: {} for r in roots}

256

all_copies = {r: {} for r in roots}

244

heapq.heapify(work)

245

alwaysmatch = match.always()

257

alwaysmatch = match.always()

246

while work:

258

for r in sorted(iterrevs):

247

r = heapq.heappop(work)

248

copies = all_copies.pop(r)

259

copies = all_copies.pop(r)

249

if r == b.rev():

250

return copies

251

for i, c in enumerate(children[r]):

260

for i, c in enumerate(children[r]):

252

p1, p2, p1copies, p2copies, removed = revinfo(c)

261

p1, p2, p1copies, p2copies, removed = revinfo(c)

253

if r == p1:

262

if r == p1:

254

parent = 1

263

parent = 1

255

childcopies = p1copies

264

childcopies = p1copies

256

else:

265

else:

257

assert r == p2

266

assert r == p2

258

parent = 2

267

parent = 2

259

childcopies = p2copies

268

childcopies = p2copies

260

if not alwaysmatch:

269

if not alwaysmatch:

261

childcopies = {

270

childcopies = {

262

dst: src for dst, src in childcopies.items() if match(dst)

271

dst: src for dst, src in childcopies.items() if match(dst)

263

}

272

}

264

# Copy the dict only if later iterations will also need it

273

# Copy the dict only if later iterations will also need it

265

if i != len(children[r]) - 1:

274

if i != len(children[r]) - 1:

266

newcopies = copies.copy()

275

newcopies = copies.copy()

267

else:

276

else:

268

newcopies = copies

277

newcopies = copies

269

if childcopies:

278

if childcopies:

270

newcopies = _chain(newcopies, childcopies)

279

newcopies = _chain(newcopies, childcopies)

271

for f in removed:

280

for f in removed:

272

if f in newcopies:

281

if f in newcopies:

273

del newcopies[f]

282

del newcopies[f]

274

othercopies = all_copies.get(c)

283

othercopies = all_copies.get(c)

275

if othercopies is None:

284

if othercopies is None:

276

heapq.heappush(work, c)

277

all_copies[c] = newcopies

285

all_copies[c] = newcopies

278

else:

286

else:

279

# we are the second parent to work on c, we need to merge our

287

# we are the second parent to work on c, we need to merge our

280

# work with the other.

288

# work with the other.

281

#

289

#

282

# Unlike when copies are stored in the filelog, we consider

290

# Unlike when copies are stored in the filelog, we consider

283

# it a copy even if the destination already existed on the

291

# it a copy even if the destination already existed on the

284

# other branch. It's simply too expensive to check if the

292

# other branch. It's simply too expensive to check if the

285

# file existed in the manifest.

293

# file existed in the manifest.

286

#

294

#

287

# In case of conflict, parent 1 take precedence over parent 2.

295

# In case of conflict, parent 1 take precedence over parent 2.

288

# This is an arbitrary choice made anew when implementing

296

# This is an arbitrary choice made anew when implementing

289

# changeset based copies. It was made without regards with

297

# changeset based copies. It was made without regards with

290

# potential filelog related behavior.

298

# potential filelog related behavior.

291

if parent == 1:

299

if parent == 1:

292

othercopies.update(newcopies)

300

othercopies.update(newcopies)

293

else:

301

else:

294

newcopies.update(othercopies)

302

newcopies.update(othercopies)

295

all_copies[c] = newcopies

303

all_copies[c] = newcopies

296

assert False

304

return all_copies[b.rev()]

297

305

298

306

299

def _forwardcopies(a, b, base=None, match=None):

307

def _forwardcopies(a, b, base=None, match=None):

300

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

308

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

301

309

302

if base is None:

310

if base is None:

303

base = a

311

base = a

304

match = a.repo().narrowmatch(match)

312

match = a.repo().narrowmatch(match)

305

# check for working copy

313

# check for working copy

306

if b.rev() is None:

314

if b.rev() is None:

307

cm = _committedforwardcopies(a, b.p1(), base, match)

315

cm = _committedforwardcopies(a, b.p1(), base, match)

308

# combine copies from dirstate if necessary

316

# combine copies from dirstate if necessary

309

copies = _chain(cm, _dirstatecopies(b._repo, match))

317

copies = _chain(cm, _dirstatecopies(b._repo, match))

310

else:

318

else:

311

copies = _committedforwardcopies(a, b, base, match)

319

copies = _committedforwardcopies(a, b, base, match)

312

return copies

320

return copies

313

321

314

322

315

def _backwardrenames(a, b, match):

323

def _backwardrenames(a, b, match):

316

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

324

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

317

return {}

325

return {}

318

326

319

# Even though we're not taking copies into account, 1:n rename situations

327

# Even though we're not taking copies into account, 1:n rename situations

320

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

328

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

321

# arbitrarily pick one of the renames.

329

# arbitrarily pick one of the renames.

322

# We don't want to pass in "match" here, since that would filter

330

# We don't want to pass in "match" here, since that would filter

323

# the destination by it. Since we're reversing the copies, we want

331

# the destination by it. Since we're reversing the copies, we want

324

# to filter the source instead.

332

# to filter the source instead.

325

f = _forwardcopies(b, a)

333

f = _forwardcopies(b, a)

326

r = {}

334

r = {}

327

for k, v in sorted(pycompat.iteritems(f)):

335

for k, v in sorted(pycompat.iteritems(f)):

328

if match and not match(v):

336

if match and not match(v):

329

continue

337

continue

330

# remove copies

338

# remove copies

331

if v in a:

339

if v in a:

332

continue

340

continue

333

r[v] = k

341

r[v] = k

334

return r

342

return r

335

343

336

344

337

def pathcopies(x, y, match=None):

345

def pathcopies(x, y, match=None):

338

"""find {dst@y: src@x} copy mapping for directed compare"""

346

"""find {dst@y: src@x} copy mapping for directed compare"""

339

repo = x._repo

347

repo = x._repo

340

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

348

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

341

if debug:

349

if debug:

342

repo.ui.debug(

350

repo.ui.debug(

343

b'debug.copies: searching copies from %s to %s\n' % (x, y)

351

b'debug.copies: searching copies from %s to %s\n' % (x, y)

344

)

352

)

345

if x == y or not x or not y:

353

if x == y or not x or not y:

346

return {}

354

return {}

347

a = y.ancestor(x)

355

a = y.ancestor(x)

348

if a == x:

356

if a == x:

349

if debug:

357

if debug:

350

repo.ui.debug(b'debug.copies: search mode: forward\n')

358

repo.ui.debug(b'debug.copies: search mode: forward\n')

351

if y.rev() is None and x == y.p1():

359

if y.rev() is None and x == y.p1():

352

# short-circuit to avoid issues with merge states

360

# short-circuit to avoid issues with merge states

353

return _dirstatecopies(repo, match)

361

return _dirstatecopies(repo, match)

354

copies = _forwardcopies(x, y, match=match)

362

copies = _forwardcopies(x, y, match=match)

355

elif a == y:

363

elif a == y:

356

if debug:

364

if debug:

357

repo.ui.debug(b'debug.copies: search mode: backward\n')

365

repo.ui.debug(b'debug.copies: search mode: backward\n')

358

copies = _backwardrenames(x, y, match=match)

366

copies = _backwardrenames(x, y, match=match)

359

else:

367

else:

360

if debug:

368

if debug:

361

repo.ui.debug(b'debug.copies: search mode: combined\n')

369

repo.ui.debug(b'debug.copies: search mode: combined\n')

362

base = None

370

base = None

363

if a.rev() != node.nullrev:

371

if a.rev() != node.nullrev:

364

base = x

372

base = x

365

copies = _chain(

373

copies = _chain(

366

_backwardrenames(x, a, match=match),

374

_backwardrenames(x, a, match=match),

367

_forwardcopies(a, y, base, match=match),

375

_forwardcopies(a, y, base, match=match),

368

)

376

)

369

_filter(x, y, copies)

377

_filter(x, y, copies)

370

return copies

378

return copies

371

379

372

380

373

def mergecopies(repo, c1, c2, base):

381

def mergecopies(repo, c1, c2, base):

374

"""

382

"""

375

Finds moves and copies between context c1 and c2 that are relevant for

383

Finds moves and copies between context c1 and c2 that are relevant for

376

merging. 'base' will be used as the merge base.

384

merging. 'base' will be used as the merge base.

377

385

378

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

386

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

379

files that were moved/ copied in one merge parent and modified in another.

387

files that were moved/ copied in one merge parent and modified in another.

380

For example:

388

For example:

381

389

382

o ---> 4 another commit

390

o ---> 4 another commit

383

|

391

|

384

| o ---> 3 commit that modifies a.txt

392

| o ---> 3 commit that modifies a.txt

385

| /

393

| /

386

o / ---> 2 commit that moves a.txt to b.txt

394

o / ---> 2 commit that moves a.txt to b.txt

387

|/

395

|/

388

o ---> 1 merge base

396

o ---> 1 merge base

389

397

390

If we try to rebase revision 3 on revision 4, since there is no a.txt in

398

If we try to rebase revision 3 on revision 4, since there is no a.txt in

391

revision 4, and if user have copytrace disabled, we prints the following

399

revision 4, and if user have copytrace disabled, we prints the following

392

message:

400

message:

393

401

394

```other changed <file> which local deleted```

402

```other changed <file> which local deleted```

395

403

396

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

404

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

397

"dirmove".

405

"dirmove".

398

406

399

"copy" is a mapping from destination name -> source name,

407

"copy" is a mapping from destination name -> source name,

400

where source is in c1 and destination is in c2 or vice-versa.

408

where source is in c1 and destination is in c2 or vice-versa.

401

409

402

"movewithdir" is a mapping from source name -> destination name,

410

"movewithdir" is a mapping from source name -> destination name,

403

where the file at source present in one context but not the other

411

where the file at source present in one context but not the other

404

needs to be moved to destination by the merge process, because the

412

needs to be moved to destination by the merge process, because the

405

other context moved the directory it is in.

413

other context moved the directory it is in.

406

414

407

"diverge" is a mapping of source name -> list of destination names

415

"diverge" is a mapping of source name -> list of destination names

408

for divergent renames.

416

for divergent renames.

409

417

410

"renamedelete" is a mapping of source name -> list of destination

418

"renamedelete" is a mapping of source name -> list of destination

411

names for files deleted in c1 that were renamed in c2 or vice-versa.

419

names for files deleted in c1 that were renamed in c2 or vice-versa.

412

420

413

"dirmove" is a mapping of detected source dir -> destination dir renames.

421

"dirmove" is a mapping of detected source dir -> destination dir renames.

414

This is needed for handling changes to new files previously grafted into

422

This is needed for handling changes to new files previously grafted into

415

renamed directories.

423

renamed directories.

416

424

417

This function calls different copytracing algorithms based on config.

425

This function calls different copytracing algorithms based on config.

418

"""

426

"""

419

# avoid silly behavior for update from empty dir

427

# avoid silly behavior for update from empty dir

420

if not c1 or not c2 or c1 == c2:

428

if not c1 or not c2 or c1 == c2:

421

return {}, {}, {}, {}, {}

429

return {}, {}, {}, {}, {}

422

430

423

narrowmatch = c1.repo().narrowmatch()

431

narrowmatch = c1.repo().narrowmatch()

424

432

425

# avoid silly behavior for parent -> working dir

433

# avoid silly behavior for parent -> working dir

426

if c2.node() is None and c1.node() == repo.dirstate.p1():

434

if c2.node() is None and c1.node() == repo.dirstate.p1():

427

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

435

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

428

436

429

copytracing = repo.ui.config(b'experimental', b'copytrace')

437

copytracing = repo.ui.config(b'experimental', b'copytrace')

430

if stringutil.parsebool(copytracing) is False:

438

if stringutil.parsebool(copytracing) is False:

431

# stringutil.parsebool() returns None when it is unable to parse the

439

# stringutil.parsebool() returns None when it is unable to parse the

432

# value, so we should rely on making sure copytracing is on such cases

440

# value, so we should rely on making sure copytracing is on such cases

433

return {}, {}, {}, {}, {}

441

return {}, {}, {}, {}, {}

434

442

435

if usechangesetcentricalgo(repo):

443

if usechangesetcentricalgo(repo):

436

# The heuristics don't make sense when we need changeset-centric algos

444

# The heuristics don't make sense when we need changeset-centric algos

437

return _fullcopytracing(repo, c1, c2, base)

445

return _fullcopytracing(repo, c1, c2, base)

438

446

439

# Copy trace disabling is explicitly below the node == p1 logic above

447

# Copy trace disabling is explicitly below the node == p1 logic above

440

# because the logic above is required for a simple copy to be kept across a

448

# because the logic above is required for a simple copy to be kept across a

441

# rebase.

449

# rebase.

442

if copytracing == b'heuristics':

450

if copytracing == b'heuristics':

443

# Do full copytracing if only non-public revisions are involved as

451

# Do full copytracing if only non-public revisions are involved as

444

# that will be fast enough and will also cover the copies which could

452

# that will be fast enough and will also cover the copies which could

445

# be missed by heuristics

453

# be missed by heuristics

446

if _isfullcopytraceable(repo, c1, base):

454

if _isfullcopytraceable(repo, c1, base):

447

return _fullcopytracing(repo, c1, c2, base)

455

return _fullcopytracing(repo, c1, c2, base)

448

return _heuristicscopytracing(repo, c1, c2, base)

456

return _heuristicscopytracing(repo, c1, c2, base)

449

else:

457

else:

450

return _fullcopytracing(repo, c1, c2, base)

458

return _fullcopytracing(repo, c1, c2, base)

451

459

452

460

453

def _isfullcopytraceable(repo, c1, base):

461

def _isfullcopytraceable(repo, c1, base):

454

""" Checks that if base, source and destination are all no-public branches,

462

""" Checks that if base, source and destination are all no-public branches,

455

if yes let's use the full copytrace algorithm for increased capabilities

463

if yes let's use the full copytrace algorithm for increased capabilities

456

since it will be fast enough.

464

since it will be fast enough.

457

465

458

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

466

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

459

number of changesets from c1 to base such that if number of changesets are

467

number of changesets from c1 to base such that if number of changesets are

460

more than the limit, full copytracing algorithm won't be used.

468

more than the limit, full copytracing algorithm won't be used.

461

"""

469

"""

462

if c1.rev() is None:

470

if c1.rev() is None:

463

c1 = c1.p1()

471

c1 = c1.p1()

464

if c1.mutable() and base.mutable():

472

if c1.mutable() and base.mutable():

465

sourcecommitlimit = repo.ui.configint(

473

sourcecommitlimit = repo.ui.configint(

466

b'experimental', b'copytrace.sourcecommitlimit'

474

b'experimental', b'copytrace.sourcecommitlimit'

467

)

475

)

468

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

476

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

469

return commits < sourcecommitlimit

477

return commits < sourcecommitlimit

470

return False

478

return False

471

479

472

480

473

def _checksinglesidecopies(

481

def _checksinglesidecopies(

474

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

482

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

475

):

483

):

476

if src not in m2:

484

if src not in m2:

477

# deleted on side 2

485

# deleted on side 2

478

if src not in m1:

486

if src not in m1:

479

# renamed on side 1, deleted on side 2

487

# renamed on side 1, deleted on side 2

480

renamedelete[src] = dsts1

488

renamedelete[src] = dsts1

481

elif m2[src] != mb[src]:

489

elif m2[src] != mb[src]:

482

if not _related(c2[src], base[src]):

490

if not _related(c2[src], base[src]):

483

return

491

return

484

# modified on side 2

492

# modified on side 2

485

for dst in dsts1:

493

for dst in dsts1:

486

if dst not in m2:

494

if dst not in m2:

487

# dst not added on side 2 (handle as regular

495

# dst not added on side 2 (handle as regular

488

# "both created" case in manifestmerge otherwise)

496

# "both created" case in manifestmerge otherwise)

489

copy[dst] = src

497

copy[dst] = src

490

498

491

499

492

def _fullcopytracing(repo, c1, c2, base):

500

def _fullcopytracing(repo, c1, c2, base):

493

""" The full copytracing algorithm which finds all the new files that were

501

""" The full copytracing algorithm which finds all the new files that were

494

added from merge base up to the top commit and for each file it checks if

502

added from merge base up to the top commit and for each file it checks if

495

this file was copied from another file.

503

this file was copied from another file.

496

504

497

This is pretty slow when a lot of changesets are involved but will track all

505

This is pretty slow when a lot of changesets are involved but will track all

498

the copies.

506

the copies.

499

"""

507

"""

500

m1 = c1.manifest()

508

m1 = c1.manifest()

501

m2 = c2.manifest()

509

m2 = c2.manifest()

502

mb = base.manifest()

510

mb = base.manifest()

503

511

504

copies1 = pathcopies(base, c1)

512

copies1 = pathcopies(base, c1)

505

copies2 = pathcopies(base, c2)

513

copies2 = pathcopies(base, c2)

506

514

507

inversecopies1 = {}

515

inversecopies1 = {}

508

inversecopies2 = {}

516

inversecopies2 = {}

509

for dst, src in copies1.items():

517

for dst, src in copies1.items():

510

inversecopies1.setdefault(src, []).append(dst)

518

inversecopies1.setdefault(src, []).append(dst)

511

for dst, src in copies2.items():

519

for dst, src in copies2.items():

512

inversecopies2.setdefault(src, []).append(dst)

520

inversecopies2.setdefault(src, []).append(dst)

513

521

514

copy = {}

522

copy = {}

515

diverge = {}

523

diverge = {}

516

renamedelete = {}

524

renamedelete = {}

517

allsources = set(inversecopies1) | set(inversecopies2)

525

allsources = set(inversecopies1) | set(inversecopies2)

518

for src in allsources:

526

for src in allsources:

519

dsts1 = inversecopies1.get(src)

527

dsts1 = inversecopies1.get(src)

520

dsts2 = inversecopies2.get(src)

528

dsts2 = inversecopies2.get(src)

521

if dsts1 and dsts2:

529

if dsts1 and dsts2:

522

# copied/renamed on both sides

530

# copied/renamed on both sides

523

if src not in m1 and src not in m2:

531

if src not in m1 and src not in m2:

524

# renamed on both sides

532

# renamed on both sides

525

dsts1 = set(dsts1)

533

dsts1 = set(dsts1)

526

dsts2 = set(dsts2)

534

dsts2 = set(dsts2)

527

# If there's some overlap in the rename destinations, we

535

# If there's some overlap in the rename destinations, we

528

# consider it not divergent. For example, if side 1 copies 'a'

536

# consider it not divergent. For example, if side 1 copies 'a'

529

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

537

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

530

# and 'd' and deletes 'a'.

538

# and 'd' and deletes 'a'.

531

if dsts1 & dsts2:

539

if dsts1 & dsts2:

532

for dst in dsts1 & dsts2:

540

for dst in dsts1 & dsts2:

533

copy[dst] = src

541

copy[dst] = src

534

else:

542

else:

535

diverge[src] = sorted(dsts1 | dsts2)

543

diverge[src] = sorted(dsts1 | dsts2)

536

elif src in m1 and src in m2:

544

elif src in m1 and src in m2:

537

# copied on both sides

545

# copied on both sides

538

dsts1 = set(dsts1)

546

dsts1 = set(dsts1)

539

dsts2 = set(dsts2)

547

dsts2 = set(dsts2)

540

for dst in dsts1 & dsts2:

548

for dst in dsts1 & dsts2:

541

copy[dst] = src

549

copy[dst] = src

542

# TODO: Handle cases where it was renamed on one side and copied

550

# TODO: Handle cases where it was renamed on one side and copied

543

# on the other side

551

# on the other side

544

elif dsts1:

552

elif dsts1:

545

# copied/renamed only on side 1

553

# copied/renamed only on side 1

546

_checksinglesidecopies(

554

_checksinglesidecopies(

547

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

555

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

548

)

556

)

549

elif dsts2:

557

elif dsts2:

550

# copied/renamed only on side 2

558

# copied/renamed only on side 2

551

_checksinglesidecopies(

559

_checksinglesidecopies(

552

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

560

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

553

)

561

)

554

562

555

renamedeleteset = set()

563

renamedeleteset = set()

556

divergeset = set()

564

divergeset = set()

557

for dsts in diverge.values():

565

for dsts in diverge.values():

558

divergeset.update(dsts)

566

divergeset.update(dsts)

559

for dsts in renamedelete.values():

567

for dsts in renamedelete.values():

560

renamedeleteset.update(dsts)

568

renamedeleteset.update(dsts)

561

569

562

# find interesting file sets from manifests

570

# find interesting file sets from manifests

563

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

571

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

564

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

572

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

565

u1 = sorted(addedinm1 - addedinm2)

573

u1 = sorted(addedinm1 - addedinm2)

566

u2 = sorted(addedinm2 - addedinm1)

574

u2 = sorted(addedinm2 - addedinm1)

567

575

568

header = b" unmatched files in %s"

576

header = b" unmatched files in %s"

569

if u1:

577

if u1:

570

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

578

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

571

if u2:

579

if u2:

572

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

580

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

573

581

574

fullcopy = copies1.copy()

582

fullcopy = copies1.copy()

575

fullcopy.update(copies2)

583

fullcopy.update(copies2)

576

if not fullcopy:

584

if not fullcopy:

577

return copy, {}, diverge, renamedelete, {}

585

return copy, {}, diverge, renamedelete, {}

578

586

579

if repo.ui.debugflag:

587

if repo.ui.debugflag:

580

repo.ui.debug(

588

repo.ui.debug(

581

b" all copies found (* = to merge, ! = divergent, "

589

b" all copies found (* = to merge, ! = divergent, "

582

b"% = renamed and deleted):\n"

590

b"% = renamed and deleted):\n"

583

)

591

)

584

for f in sorted(fullcopy):

592

for f in sorted(fullcopy):

585

note = b""

593

note = b""

586

if f in copy:

594

if f in copy:

587

note += b"*"

595

note += b"*"

588

if f in divergeset:

596

if f in divergeset:

589

note += b"!"

597

note += b"!"

590

if f in renamedeleteset:

598

if f in renamedeleteset:

591

note += b"%"

599

note += b"%"

592

repo.ui.debug(

600

repo.ui.debug(

593

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

601

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

594

)

602

)

595

del divergeset

603

del divergeset

596

604

597

repo.ui.debug(b" checking for directory renames\n")

605

repo.ui.debug(b" checking for directory renames\n")

598

606

599

# generate a directory move map

607

# generate a directory move map

600

d1, d2 = c1.dirs(), c2.dirs()

608

d1, d2 = c1.dirs(), c2.dirs()

601

invalid = set()

609

invalid = set()

602

dirmove = {}

610

dirmove = {}

603

611

604

# examine each file copy for a potential directory move, which is

612

# examine each file copy for a potential directory move, which is

605

# when all the files in a directory are moved to a new directory

613

# when all the files in a directory are moved to a new directory

606

for dst, src in pycompat.iteritems(fullcopy):

614

for dst, src in pycompat.iteritems(fullcopy):

607

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

615

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

608

if dsrc in invalid:

616

if dsrc in invalid:

609

# already seen to be uninteresting

617

# already seen to be uninteresting

610

continue

618

continue

611

elif dsrc in d1 and ddst in d1:

619

elif dsrc in d1 and ddst in d1:

612

# directory wasn't entirely moved locally

620

# directory wasn't entirely moved locally

613

invalid.add(dsrc)

621

invalid.add(dsrc)

614

elif dsrc in d2 and ddst in d2:

622

elif dsrc in d2 and ddst in d2:

615

# directory wasn't entirely moved remotely

623

# directory wasn't entirely moved remotely

616

invalid.add(dsrc)

624

invalid.add(dsrc)

617

elif dsrc in dirmove and dirmove[dsrc] != ddst:

625

elif dsrc in dirmove and dirmove[dsrc] != ddst:

618

# files from the same directory moved to two different places

626

# files from the same directory moved to two different places

619

invalid.add(dsrc)

627

invalid.add(dsrc)

620

else:

628

else:

621

# looks good so far

629

# looks good so far

622

dirmove[dsrc] = ddst

630

dirmove[dsrc] = ddst

623

631

624

for i in invalid:

632

for i in invalid:

625

if i in dirmove:

633

if i in dirmove:

626

del dirmove[i]

634

del dirmove[i]

627

del d1, d2, invalid

635

del d1, d2, invalid

628

636

629

if not dirmove:

637

if not dirmove:

630

return copy, {}, diverge, renamedelete, {}

638

return copy, {}, diverge, renamedelete, {}

631

639

632

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

640

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

633

641

634

for d in dirmove:

642

for d in dirmove:

635

repo.ui.debug(

643

repo.ui.debug(

636

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

644

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

637

)

645

)

638

646

639

movewithdir = {}

647

movewithdir = {}

640

# check unaccounted nonoverlapping files against directory moves

648

# check unaccounted nonoverlapping files against directory moves

641

for f in u1 + u2:

649

for f in u1 + u2:

642

if f not in fullcopy:

650

if f not in fullcopy:

643

for d in dirmove:

651

for d in dirmove:

644

if f.startswith(d):

652

if f.startswith(d):

645

# new file added in a directory that was moved, move it

653

# new file added in a directory that was moved, move it

646

df = dirmove[d] + f[len(d) :]

654

df = dirmove[d] + f[len(d) :]

647

if df not in copy:

655

if df not in copy:

648

movewithdir[f] = df

656

movewithdir[f] = df

649

repo.ui.debug(

657

repo.ui.debug(

650

b" pending file src: '%s' -> dst: '%s'\n"

658

b" pending file src: '%s' -> dst: '%s'\n"

651

% (f, df)

659

% (f, df)

652

)

660

)

653

break

661

break

654

662

655

return copy, movewithdir, diverge, renamedelete, dirmove

663

return copy, movewithdir, diverge, renamedelete, dirmove

656

664

657

665

658

def _heuristicscopytracing(repo, c1, c2, base):

666

def _heuristicscopytracing(repo, c1, c2, base):

659

""" Fast copytracing using filename heuristics

667

""" Fast copytracing using filename heuristics

660

668

661

Assumes that moves or renames are of following two types:

669

Assumes that moves or renames are of following two types:

662

670

663

1) Inside a directory only (same directory name but different filenames)

671

1) Inside a directory only (same directory name but different filenames)

664

2) Move from one directory to another

672

2) Move from one directory to another

665

(same filenames but different directory names)

673

(same filenames but different directory names)

666

674

667

Works only when there are no merge commits in the "source branch".

675

Works only when there are no merge commits in the "source branch".

668

Source branch is commits from base up to c2 not including base.

676

Source branch is commits from base up to c2 not including base.

669

677

670

If merge is involved it fallbacks to _fullcopytracing().

678

If merge is involved it fallbacks to _fullcopytracing().

671

679

672

Can be used by setting the following config:

680

Can be used by setting the following config:

673

681

674

[experimental]

682

[experimental]

675

copytrace = heuristics

683

copytrace = heuristics

676

684

677

In some cases the copy/move candidates found by heuristics can be very large

685

In some cases the copy/move candidates found by heuristics can be very large

678

in number and that will make the algorithm slow. The number of possible

686

in number and that will make the algorithm slow. The number of possible

679

candidates to check can be limited by using the config

687

candidates to check can be limited by using the config

680

`experimental.copytrace.movecandidateslimit` which defaults to 100.

688

`experimental.copytrace.movecandidateslimit` which defaults to 100.

681

"""

689

"""

682

690

683

if c1.rev() is None:

691

if c1.rev() is None:

684

c1 = c1.p1()

692

c1 = c1.p1()

685

if c2.rev() is None:

693

if c2.rev() is None:

686

c2 = c2.p1()

694

c2 = c2.p1()

687

695

688

copies = {}

696

copies = {}

689

697

690

changedfiles = set()

698

changedfiles = set()

691

m1 = c1.manifest()

699

m1 = c1.manifest()

692

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

700

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

693

# If base is not in c2 branch, we switch to fullcopytracing

701

# If base is not in c2 branch, we switch to fullcopytracing

694

repo.ui.debug(

702

repo.ui.debug(

695

b"switching to full copytracing as base is not "

703

b"switching to full copytracing as base is not "

696

b"an ancestor of c2\n"

704

b"an ancestor of c2\n"

697

)

705

)

698

return _fullcopytracing(repo, c1, c2, base)

706

return _fullcopytracing(repo, c1, c2, base)

699

707

700

ctx = c2

708

ctx = c2

701

while ctx != base:

709

while ctx != base:

702

if len(ctx.parents()) == 2:

710

if len(ctx.parents()) == 2:

703

# To keep things simple let's not handle merges

711

# To keep things simple let's not handle merges

704

repo.ui.debug(b"switching to full copytracing because of merges\n")

712

repo.ui.debug(b"switching to full copytracing because of merges\n")

705

return _fullcopytracing(repo, c1, c2, base)

713

return _fullcopytracing(repo, c1, c2, base)

706

changedfiles.update(ctx.files())

714

changedfiles.update(ctx.files())

707

ctx = ctx.p1()

715

ctx = ctx.p1()

708

716

709

cp = _forwardcopies(base, c2)

717

cp = _forwardcopies(base, c2)

710

for dst, src in pycompat.iteritems(cp):

718

for dst, src in pycompat.iteritems(cp):

711

if src in m1:

719

if src in m1:

712

copies[dst] = src

720

copies[dst] = src

713

721

714

# file is missing if it isn't present in the destination, but is present in

722

# file is missing if it isn't present in the destination, but is present in

715

# the base and present in the source.

723

# the base and present in the source.

716

# Presence in the base is important to exclude added files, presence in the

724

# Presence in the base is important to exclude added files, presence in the

717

# source is important to exclude removed files.

725

# source is important to exclude removed files.

718

filt = lambda f: f not in m1 and f in base and f in c2

726

filt = lambda f: f not in m1 and f in base and f in c2

719

missingfiles = [f for f in changedfiles if filt(f)]

727

missingfiles = [f for f in changedfiles if filt(f)]

720

728

721

if missingfiles:

729

if missingfiles:

722

basenametofilename = collections.defaultdict(list)

730

basenametofilename = collections.defaultdict(list)

723

dirnametofilename = collections.defaultdict(list)

731

dirnametofilename = collections.defaultdict(list)

724

732

725

for f in m1.filesnotin(base.manifest()):

733

for f in m1.filesnotin(base.manifest()):

726

basename = os.path.basename(f)

734

basename = os.path.basename(f)

727

dirname = os.path.dirname(f)

735

dirname = os.path.dirname(f)

728

basenametofilename[basename].append(f)

736

basenametofilename[basename].append(f)

729

dirnametofilename[dirname].append(f)

737

dirnametofilename[dirname].append(f)

730

738

731

for f in missingfiles:

739

for f in missingfiles:

732

basename = os.path.basename(f)

740

basename = os.path.basename(f)

733

dirname = os.path.dirname(f)

741

dirname = os.path.dirname(f)

734

samebasename = basenametofilename[basename]

742

samebasename = basenametofilename[basename]

735

samedirname = dirnametofilename[dirname]

743

samedirname = dirnametofilename[dirname]

736

movecandidates = samebasename + samedirname

744

movecandidates = samebasename + samedirname

737

# f is guaranteed to be present in c2, that's why

745

# f is guaranteed to be present in c2, that's why

738

# c2.filectx(f) won't fail

746

# c2.filectx(f) won't fail

739

f2 = c2.filectx(f)

747

f2 = c2.filectx(f)

740

# we can have a lot of candidates which can slow down the heuristics

748

# we can have a lot of candidates which can slow down the heuristics

741

# config value to limit the number of candidates moves to check

749

# config value to limit the number of candidates moves to check

742

maxcandidates = repo.ui.configint(

750

maxcandidates = repo.ui.configint(

743

b'experimental', b'copytrace.movecandidateslimit'

751

b'experimental', b'copytrace.movecandidateslimit'

744

)

752

)

745

753

746

if len(movecandidates) > maxcandidates:

754

if len(movecandidates) > maxcandidates:

747

repo.ui.status(

755

repo.ui.status(

748

_(

756

_(

749

b"skipping copytracing for '%s', more "

757

b"skipping copytracing for '%s', more "

750

b"candidates than the limit: %d\n"

758

b"candidates than the limit: %d\n"

751

)

759

)

752

% (f, len(movecandidates))

760

% (f, len(movecandidates))

753

)

761

)

754

continue

762

continue

755

763

756

for candidate in movecandidates:

764

for candidate in movecandidates:

757

f1 = c1.filectx(candidate)

765

f1 = c1.filectx(candidate)

758

if _related(f1, f2):

766

if _related(f1, f2):

759

# if there are a few related copies then we'll merge

767

# if there are a few related copies then we'll merge

760

# changes into all of them. This matches the behaviour

768

# changes into all of them. This matches the behaviour

761

# of upstream copytracing

769

# of upstream copytracing

762

copies[candidate] = f

770

copies[candidate] = f

763

771

764

return copies, {}, {}, {}, {}

772

return copies, {}, {}, {}, {}

765

773

766

774

767

def _related(f1, f2):

775

def _related(f1, f2):

768

"""return True if f1 and f2 filectx have a common ancestor

776

"""return True if f1 and f2 filectx have a common ancestor

769

777

770

Walk back to common ancestor to see if the two files originate

778

Walk back to common ancestor to see if the two files originate

771

from the same file. Since workingfilectx's rev() is None it messes

779

from the same file. Since workingfilectx's rev() is None it messes

772

up the integer comparison logic, hence the pre-step check for

780

up the integer comparison logic, hence the pre-step check for

773

None (f1 and f2 can only be workingfilectx's initially).

781

None (f1 and f2 can only be workingfilectx's initially).

774

"""

782

"""

775

783

776

if f1 == f2:

784

if f1 == f2:

777

return True # a match

785

return True # a match

778

786

779

g1, g2 = f1.ancestors(), f2.ancestors()

787

g1, g2 = f1.ancestors(), f2.ancestors()

780

try:

788

try:

781

f1r, f2r = f1.linkrev(), f2.linkrev()

789

f1r, f2r = f1.linkrev(), f2.linkrev()

782

790

783

if f1r is None:

791

if f1r is None:

784

f1 = next(g1)

792

f1 = next(g1)

785

if f2r is None:

793

if f2r is None:

786

f2 = next(g2)

794

f2 = next(g2)

787

795

788

while True:

796

while True:

789

f1r, f2r = f1.linkrev(), f2.linkrev()

797

f1r, f2r = f1.linkrev(), f2.linkrev()

790

if f1r > f2r:

798

if f1r > f2r:

791

f1 = next(g1)

799

f1 = next(g1)

792

elif f2r > f1r:

800

elif f2r > f1r:

793

f2 = next(g2)

801

f2 = next(g2)

794

else: # f1 and f2 point to files in the same linkrev

802

else: # f1 and f2 point to files in the same linkrev

795

return f1 == f2 # true if they point to the same file

803

return f1 == f2 # true if they point to the same file

796

except StopIteration:

804

except StopIteration:

797

return False

805

return False

798

806

799

807

800

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

808

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

801

"""reproduce copies from fromrev to rev in the dirstate

809

"""reproduce copies from fromrev to rev in the dirstate

802

810

803

If skiprev is specified, it's a revision that should be used to

811

If skiprev is specified, it's a revision that should be used to

804

filter copy records. Any copies that occur between fromrev and

812

filter copy records. Any copies that occur between fromrev and

805

skiprev will not be duplicated, even if they appear in the set of

813

skiprev will not be duplicated, even if they appear in the set of

806

copies between fromrev and rev.

814

copies between fromrev and rev.

807

"""

815

"""

808

exclude = {}

816

exclude = {}

809

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

817

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

810

bctrace = stringutil.parsebool(ctraceconfig)

818

bctrace = stringutil.parsebool(ctraceconfig)

811

if skiprev is not None and (

819

if skiprev is not None and (

812

ctraceconfig == b'heuristics' or bctrace or bctrace is None

820

ctraceconfig == b'heuristics' or bctrace or bctrace is None

813

):

821

):

814

# copytrace='off' skips this line, but not the entire function because

822

# copytrace='off' skips this line, but not the entire function because

815

# the line below is O(size of the repo) during a rebase, while the rest

823

# the line below is O(size of the repo) during a rebase, while the rest

816

# of the function is much faster (and is required for carrying copy

824

# of the function is much faster (and is required for carrying copy

817

# metadata across the rebase anyway).

825

# metadata across the rebase anyway).

818

exclude = pathcopies(repo[fromrev], repo[skiprev])

826

exclude = pathcopies(repo[fromrev], repo[skiprev])

819

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

827

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

820

if dst in exclude:

828

if dst in exclude:

821

continue

829

continue

822

if dst in wctx:

830

if dst in wctx:

823

wctx[dst].markcopied(src)

831

wctx[dst].markcopied(src)

824

832

825

833

826

def computechangesetfilesadded(ctx):

834

def computechangesetfilesadded(ctx):

827

"""return the list of files added in a changeset

835

"""return the list of files added in a changeset

828

"""

836

"""

829

added = []

837

added = []

830

for f in ctx.files():

838

for f in ctx.files():

831

if not any(f in p for p in ctx.parents()):

839

if not any(f in p for p in ctx.parents()):

832

added.append(f)

840

added.append(f)

833

return added

841

return added

834

842

835

843

836

def computechangesetfilesremoved(ctx):

844

def computechangesetfilesremoved(ctx):

837

"""return the list of files removed in a changeset

845

"""return the list of files removed in a changeset

838

"""

846

"""

839

removed = []

847

removed = []

840

for f in ctx.files():

848

for f in ctx.files():

841

if f not in ctx:

849

if f not in ctx:

842

removed.append(f)

850

removed.append(f)

843

return removed

851

return removed

844

852

845

853

846

def computechangesetcopies(ctx):

854

def computechangesetcopies(ctx):

847

"""return the copies data for a changeset

855

"""return the copies data for a changeset

848

856

849

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

857

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

850

858

851

Each dictionnary are in the form: `{newname: oldname}`

859

Each dictionnary are in the form: `{newname: oldname}`

852

"""

860

"""

853

p1copies = {}

861

p1copies = {}

854

p2copies = {}

862

p2copies = {}

855

p1 = ctx.p1()

863

p1 = ctx.p1()

856

p2 = ctx.p2()

864

p2 = ctx.p2()

857

narrowmatch = ctx._repo.narrowmatch()

865

narrowmatch = ctx._repo.narrowmatch()

858

for dst in ctx.files():

866

for dst in ctx.files():

859

if not narrowmatch(dst) or dst not in ctx:

867

if not narrowmatch(dst) or dst not in ctx:

860

continue

868

continue

861

copied = ctx[dst].renamed()

869

copied = ctx[dst].renamed()

862

if not copied:

870

if not copied:

863

continue

871

continue

864

src, srcnode = copied

872

src, srcnode = copied

865

if src in p1 and p1[src].filenode() == srcnode:

873

if src in p1 and p1[src].filenode() == srcnode:

866

p1copies[dst] = src

874

p1copies[dst] = src

867

elif src in p2 and p2[src].filenode() == srcnode:

875

elif src in p2 and p2[src].filenode() == srcnode:

868

p2copies[dst] = src

876

p2copies[dst] = src

869

return p1copies, p2copies

877

return p1copies, p2copies

870

878

871

879

872

def encodecopies(files, copies):

880

def encodecopies(files, copies):

873

items = []

881

items = []

874

for i, dst in enumerate(files):

882

for i, dst in enumerate(files):

875

if dst in copies:

883

if dst in copies:

876

items.append(b'%d\0%s' % (i, copies[dst]))

884

items.append(b'%d\0%s' % (i, copies[dst]))

877

if len(items) != len(copies):

885

if len(items) != len(copies):

878

raise error.ProgrammingError(

886

raise error.ProgrammingError(

879

b'some copy targets missing from file list'

887

b'some copy targets missing from file list'

880

)

888

)

881

return b"\n".join(items)

889

return b"\n".join(items)

882

890

883

891

884

def decodecopies(files, data):

892

def decodecopies(files, data):

885

try:

893

try:

886

copies = {}

894

copies = {}

887

if not data:

895

if not data:

888

return copies

896

return copies

889

for l in data.split(b'\n'):

897

for l in data.split(b'\n'):

890

strindex, src = l.split(b'\0')

898

strindex, src = l.split(b'\0')

891

i = int(strindex)

899

i = int(strindex)

892

dst = files[i]

900

dst = files[i]

893

copies[dst] = src

901

copies[dst] = src

894

return copies

902

return copies

895

except (ValueError, IndexError):

903

except (ValueError, IndexError):

896

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

904

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

897

# used different syntax for the value.

905

# used different syntax for the value.

898

return None

906

return None

899

907

900

908

901

def encodefileindices(files, subset):

909

def encodefileindices(files, subset):

902

subset = set(subset)

910

subset = set(subset)

903

indices = []

911

indices = []

904

for i, f in enumerate(files):

912

for i, f in enumerate(files):

905

if f in subset:

913

if f in subset:

906

indices.append(b'%d' % i)

914

indices.append(b'%d' % i)

907

return b'\n'.join(indices)

915

return b'\n'.join(indices)

908

916

909

917

910

def decodefileindices(files, data):

918

def decodefileindices(files, data):

911

try:

919

try:

912

subset = []

920

subset = []

913

if not data:

921

if not data:

914

return subset

922

return subset

915

for strindex in data.split(b'\n'):

923

for strindex in data.split(b'\n'):

916

i = int(strindex)

924

i = int(strindex)

917

if i < 0 or i >= len(files):

925

if i < 0 or i >= len(files):

918

return None

926

return None

919

subset.append(files[i])

927

subset.append(files[i])

920

return subset

928

return subset

921

except (ValueError, IndexError):

929

except (ValueError, IndexError):

922

# Perhaps someone had chosen the same key name (e.g. "added") and

930

# Perhaps someone had chosen the same key name (e.g. "added") and

923

# used different syntax for the value.

931

# used different syntax for the value.

924

return None

932

return None

925

933

926

934

927

def _getsidedata(srcrepo, rev):

935

def _getsidedata(srcrepo, rev):

928

ctx = srcrepo[rev]

936

ctx = srcrepo[rev]

929

filescopies = computechangesetcopies(ctx)

937

filescopies = computechangesetcopies(ctx)

930

filesadded = computechangesetfilesadded(ctx)

938

filesadded = computechangesetfilesadded(ctx)

931

filesremoved = computechangesetfilesremoved(ctx)

939

filesremoved = computechangesetfilesremoved(ctx)

932

sidedata = {}

940

sidedata = {}

933

if any([filescopies, filesadded, filesremoved]):

941

if any([filescopies, filesadded, filesremoved]):

934

sortedfiles = sorted(ctx.files())

942

sortedfiles = sorted(ctx.files())

935

p1copies, p2copies = filescopies

943

p1copies, p2copies = filescopies

936

p1copies = encodecopies(sortedfiles, p1copies)

944

p1copies = encodecopies(sortedfiles, p1copies)

937

p2copies = encodecopies(sortedfiles, p2copies)

945

p2copies = encodecopies(sortedfiles, p2copies)

938

filesadded = encodefileindices(sortedfiles, filesadded)

946

filesadded = encodefileindices(sortedfiles, filesadded)

939

filesremoved = encodefileindices(sortedfiles, filesremoved)

947

filesremoved = encodefileindices(sortedfiles, filesremoved)

940

if p1copies:

948

if p1copies:

941

sidedata[sidedatamod.SD_P1COPIES] = p1copies

949

sidedata[sidedatamod.SD_P1COPIES] = p1copies

942

if p2copies:

950

if p2copies:

943

sidedata[sidedatamod.SD_P2COPIES] = p2copies

951

sidedata[sidedatamod.SD_P2COPIES] = p2copies

944

if filesadded:

952

if filesadded:

945

sidedata[sidedatamod.SD_FILESADDED] = filesadded

953

sidedata[sidedatamod.SD_FILESADDED] = filesadded

946

if filesremoved:

954

if filesremoved:

947

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

955

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

948

return sidedata

956

return sidedata

949

957

950

958

951

def getsidedataadder(srcrepo, destrepo):

959

def getsidedataadder(srcrepo, destrepo):

952

def sidedatacompanion(revlog, rev):

960

def sidedatacompanion(revlog, rev):

953

sidedata = {}

961

sidedata = {}

954

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

962

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

955

sidedata = _getsidedata(srcrepo, rev)

963

sidedata = _getsidedata(srcrepo, rev)

956

return False, (), sidedata

964

return False, (), sidedata

957

965

958

return sidedatacompanion

966

return sidedatacompanion

959

967

960

968

961

def getsidedataremover(srcrepo, destrepo):

969

def getsidedataremover(srcrepo, destrepo):

962

def sidedatacompanion(revlog, rev):

970

def sidedatacompanion(revlog, rev):

963

f = ()

971

f = ()

964

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

972

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

965

if revlog.flags(rev) & REVIDX_SIDEDATA:

973

if revlog.flags(rev) & REVIDX_SIDEDATA:

966

f = (

974

f = (

967

sidedatamod.SD_P1COPIES,

975

sidedatamod.SD_P1COPIES,

968

sidedatamod.SD_P2COPIES,

976

sidedatamod.SD_P2COPIES,

969

sidedatamod.SD_FILESADDED,

977

sidedatamod.SD_FILESADDED,

970

sidedatamod.SD_FILESREMOVED,

978

sidedatamod.SD_FILESREMOVED,

971

)

979

)

972

return False, f, {}

980

return False, f, {}

973

981

974

return sidedatacompanion

982

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
-            import heapq
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = ()
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
+                mrset = set(missingrevs)
+                roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
+                        if p not in mrset:
+                            roots.add(p)
+                if not roots:
+                    # no common revision to track copies from
+                    return {}
+                min_root = min(roots)
-                roots = set(children) - set(missingrevs)
+                from_head = set(
-                work = list(roots)
+                    cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
+                )
+                iterrevs = set(from_head)
+                iterrevs &= mrset
+                iterrevs.update(roots)
+                iterrevs.remove(b.rev())
                 all_copies = {r: {} for r in roots}
-                heapq.heapify(work)
                 alwaysmatch = match.always()
-                while work:
+                for r in sorted(iterrevs):
-                    r = heapq.heappop(work)
                     copies = all_copies.pop(r)
-                    if r == b.rev():
-                        return copies
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         # Copy the dict only if later iterations will also need it
                         if i != len(children[r]) - 1:
                             newcopies = copies.copy()
                         else:
                             newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                         for f in removed:
                             if f in newcopies:
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
-                            heapq.heappush(work, c)
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
-                assert False
+                return all_copies[b.rev()]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion