upstream/mercurial-mirror Commit - r47395:324ded1a

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import (

15

from .node import (

16

nullid,

16

nullid,

17

nullrev,

17

nullrev,

18

)

18

)

19

20

from . import (

20

from . import (

21

match as matchmod,

21

match as matchmod,

22

pathutil,

22

pathutil,

23

policy,

23

policy,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

from .revlogutils import (

31

from .revlogutils import (

32

flagutil,

32

flagutil,

33

sidedata as sidedatamod,

33

sidedata as sidedatamod,

34

)

34

)

35

36

rustmod = policy.importrust("copy_tracing")

36

rustmod = policy.importrust("copy_tracing")

37

38

39

def _filter(src, dst, t):

39

def _filter(src, dst, t):

40

"""filters out invalid copies after chaining"""

40

"""filters out invalid copies after chaining"""

41

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

44

# in the following table (not including trivial cases). For example, case 6

44

# in the following table (not including trivial cases). For example, case 6

45

# is where a file existed in 'src' and remained under that name in 'mid' and

45

# is where a file existed in 'src' and remained under that name in 'mid' and

46

# then was renamed between 'mid' and 'dst'.

46

# then was renamed between 'mid' and 'dst'.

47

#

47

#

48

# case src mid dst result

48

# case src mid dst result

49

# 1 x y - -

49

# 1 x y - -

50

# 2 x y y x->y

50

# 2 x y y x->y

51

# 3 x y x -

51

# 3 x y x -

52

# 4 x y z x->z

52

# 4 x y z x->z

53

# 5 - x y -

53

# 5 - x y -

54

# 6 x x y x->y

54

# 6 x x y x->y

55

#

55

#

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

58

# between 5 and 6, so it includes all cases in its result.

58

# between 5 and 6, so it includes all cases in its result.

59

# Cases 1, 3, and 5 are then removed by _filter().

59

# Cases 1, 3, and 5 are then removed by _filter().

60

61

for k, v in list(t.items()):

61

for k, v in list(t.items()):

62

if k == v: # case 3

62

if k == v: # case 3

63

del t[k]

63

del t[k]

64

elif v not in src: # case 5

64

elif v not in src: # case 5

65

# remove copies from files that didn't exist

65

# remove copies from files that didn't exist

66

del t[k]

66

del t[k]

67

elif k not in dst: # case 1

67

elif k not in dst: # case 1

68

# remove copies to files that were then removed

68

# remove copies to files that were then removed

69

del t[k]

69

del t[k]

70

71

72

def _chain(prefix, suffix):

72

def _chain(prefix, suffix):

73

"""chain two sets of copies 'prefix' and 'suffix'"""

73

"""chain two sets of copies 'prefix' and 'suffix'"""

74

result = prefix.copy()

74

result = prefix.copy()

75

for key, value in pycompat.iteritems(suffix):

75

for key, value in pycompat.iteritems(suffix):

76

result[key] = prefix.get(value, value)

76

result[key] = prefix.get(value, value)

77

return result

77

return result

78

79

80

def _tracefile(fctx, am, basemf):

80

def _tracefile(fctx, am, basemf):

81

"""return file context that is the ancestor of fctx present in ancestor

81

"""return file context that is the ancestor of fctx present in ancestor

82

manifest am

82

manifest am

83

84

Note: we used to try and stop after a given limit, however checking if that

84

Note: we used to try and stop after a given limit, however checking if that

85

limit is reached turned out to be very expensive. we are better off

85

limit is reached turned out to be very expensive. we are better off

86

disabling that feature."""

86

disabling that feature."""

87

88

for f in fctx.ancestors():

88

for f in fctx.ancestors():

89

path = f.path()

89

path = f.path()

90

if am.get(path, None) == f.filenode():

90

if am.get(path, None) == f.filenode():

91

return path

91

return path

92

if basemf and basemf.get(path, None) == f.filenode():

92

if basemf and basemf.get(path, None) == f.filenode():

93

return path

93

return path

94

95

96

def _dirstatecopies(repo, match=None):

96

def _dirstatecopies(repo, match=None):

97

ds = repo.dirstate

97

ds = repo.dirstate

98

c = ds.copies().copy()

98

c = ds.copies().copy()

99

for k in list(c):

99

for k in list(c):

100

if ds[k] not in b'anm' or (match and not match(k)):

100

if ds[k] not in b'anm' or (match and not match(k)):

101

del c[k]

101

del c[k]

102

return c

102

return c

103

104

105

def _computeforwardmissing(a, b, match=None):

105

def _computeforwardmissing(a, b, match=None):

106

"""Computes which files are in b but not a.

106

"""Computes which files are in b but not a.

107

This is its own function so extensions can easily wrap this call to see what

107

This is its own function so extensions can easily wrap this call to see what

108

files _forwardcopies is about to process.

108

files _forwardcopies is about to process.

109

"""

109

"""

110

ma = a.manifest()

110

ma = a.manifest()

111

mb = b.manifest()

111

mb = b.manifest()

112

return mb.filesnotin(ma, match=match)

112

return mb.filesnotin(ma, match=match)

113

114

115

def usechangesetcentricalgo(repo):

115

def usechangesetcentricalgo(repo):

116

"""Checks if we should use changeset-centric copy algorithms"""

116

"""Checks if we should use changeset-centric copy algorithms"""

117

if repo.filecopiesmode == b'changeset-sidedata':

117

if repo.filecopiesmode == b'changeset-sidedata':

118

return True

118

return True

119

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

119

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

120

changesetsource = (b'changeset-only', b'compatibility')

120

changesetsource = (b'changeset-only', b'compatibility')

121

return readfrom in changesetsource

121

return readfrom in changesetsource

122

123

124

def _committedforwardcopies(a, b, base, match):

124

def _committedforwardcopies(a, b, base, match):

125

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

125

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

126

# files might have to be traced back to the fctx parent of the last

126

# files might have to be traced back to the fctx parent of the last

127

# one-side-only changeset, but not further back than that

127

# one-side-only changeset, but not further back than that

128

repo = a._repo

128

repo = a._repo

129

130

if usechangesetcentricalgo(repo):

130

if usechangesetcentricalgo(repo):

131

return _changesetforwardcopies(a, b, match)

131

return _changesetforwardcopies(a, b, match)

132

133

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

133

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

134

dbg = repo.ui.debug

134

dbg = repo.ui.debug

135

if debug:

135

if debug:

136

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

136

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

137

am = a.manifest()

137

am = a.manifest()

138

basemf = None if base is None else base.manifest()

138

basemf = None if base is None else base.manifest()

139

140

# find where new files came from

140

# find where new files came from

141

# we currently don't try to find where old files went, too expensive

141

# we currently don't try to find where old files went, too expensive

142

# this means we can miss a case like 'hg rm b; hg cp a b'

142

# this means we can miss a case like 'hg rm b; hg cp a b'

143

cm = {}

143

cm = {}

144

145

# Computing the forward missing is quite expensive on large manifests, since

145

# Computing the forward missing is quite expensive on large manifests, since

146

# it compares the entire manifests. We can optimize it in the common use

146

# it compares the entire manifests. We can optimize it in the common use

147

# case of computing what copies are in a commit versus its parent (like

147

# case of computing what copies are in a commit versus its parent (like

148

# during a rebase or histedit). Note, we exclude merge commits from this

148

# during a rebase or histedit). Note, we exclude merge commits from this

149

# optimization, since the ctx.files() for a merge commit is not correct for

149

# optimization, since the ctx.files() for a merge commit is not correct for

150

# this comparison.

150

# this comparison.

151

forwardmissingmatch = match

151

forwardmissingmatch = match

152

if b.p1() == a and b.p2().node() == nullid:

152

if b.p1() == a and b.p2().node() == nullid:

153

filesmatcher = matchmod.exact(b.files())

153

filesmatcher = matchmod.exact(b.files())

154

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

154

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

155

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

155

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

156

missing = list(b.walk(match))

156

missing = list(b.walk(match))

157

# _computeforwardmissing(a, b, match=forwardmissingmatch)

157

# _computeforwardmissing(a, b, match=forwardmissingmatch)

158

if debug:

158

if debug:

159

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

159

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

160

else:

160

else:

161

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

161

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

162

if debug:

162

if debug:

163

dbg(

163

dbg(

164

b'debug.copies: missing files to search: %d\n'

164

b'debug.copies: missing files to search: %d\n'

165

% len(missing)

165

% len(missing)

166

)

166

)

167

168

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

168

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

169

170

for f in sorted(missing):

170

for f in sorted(missing):

171

if debug:

171

if debug:

172

dbg(b'debug.copies: tracing file: %s\n' % f)

172

dbg(b'debug.copies: tracing file: %s\n' % f)

173

fctx = b[f]

173

fctx = b[f]

174

fctx._ancestrycontext = ancestrycontext

174

fctx._ancestrycontext = ancestrycontext

175

176

if debug:

176

if debug:

177

start = util.timer()

177

start = util.timer()

178

opath = _tracefile(fctx, am, basemf)

178

opath = _tracefile(fctx, am, basemf)

179

if opath:

179

if opath:

180

if debug:

180

if debug:

181

dbg(b'debug.copies: rename of: %s\n' % opath)

181

dbg(b'debug.copies: rename of: %s\n' % opath)

182

cm[f] = opath

182

cm[f] = opath

183

if debug:

183

if debug:

184

dbg(

184

dbg(

185

b'debug.copies: time: %f seconds\n'

185

b'debug.copies: time: %f seconds\n'

186

% (util.timer() - start)

186

% (util.timer() - start)

187

)

187

)

188

return cm

188

return cm

189

190

191

def _revinfo_getter(repo, match):

191

def _revinfo_getter(repo, match):

192

"""returns a function that returns the following data given a <rev>"

192

"""returns a function that returns the following data given a <rev>"

193

194

* p1: revision number of first parent

194

* p1: revision number of first parent

195

* p2: revision number of first parent

195

* p2: revision number of first parent

196

* changes: a ChangingFiles object

196

* changes: a ChangingFiles object

197

"""

197

"""

198

cl = repo.changelog

198

cl = repo.changelog

199

parents = cl.parentrevs

199

parents = cl.parentrevs

200

flags = cl.flags

200

flags = cl.flags

201

202

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

202

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

203

204

changelogrevision = cl.changelogrevision

204

changelogrevision = cl.changelogrevision

205

206

if rustmod is not None:

206

if rustmod is not None:

207

208

def revinfo(rev):

208

def revinfo(rev):

209

p1, p2 = parents(rev)

209

p1, p2 = parents(rev)

210

if flags(rev) & HASCOPIESINFO:

210

if flags(rev) & HASCOPIESINFO:

211

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

211

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

212

else:

212

else:

213

raw = None

213

raw = None

214

return (p1, p2, raw)

214

return (p1, p2, raw)

215

216

else:

216

else:

217

218

def revinfo(rev):

218

def revinfo(rev):

219

p1, p2 = parents(rev)

219

p1, p2 = parents(rev)

220

if flags(rev) & HASCOPIESINFO:

220

if flags(rev) & HASCOPIESINFO:

221

changes = changelogrevision(rev).changes

221

changes = changelogrevision(rev).changes

222

else:

222

else:

223

changes = None

223

changes = None

224

return (p1, p2, changes)

224

return (p1, p2, changes)

225

226

return revinfo

226

return revinfo

227

228

229

def cached_is_ancestor(is_ancestor):

229

def cached_is_ancestor(is_ancestor):

230

"""return a cached version of is_ancestor"""

230

"""return a cached version of is_ancestor"""

231

cache = {}

231

cache = {}

232

233

def _is_ancestor(anc, desc):

233

def _is_ancestor(anc, desc):

234

if anc > desc:

234

if anc > desc:

235

return False

235

return False

236

elif anc == desc:

236

elif anc == desc:

237

return True

237

return True

238

key = (anc, desc)

238

key = (anc, desc)

239

ret = cache.get(key)

239

ret = cache.get(key)

240

if ret is None:

240

if ret is None:

241

ret = cache[key] = is_ancestor(anc, desc)

241

ret = cache[key] = is_ancestor(anc, desc)

242

return ret

242

return ret

243

244

return _is_ancestor

244

return _is_ancestor

245

246

247

def _changesetforwardcopies(a, b, match):

247

def _changesetforwardcopies(a, b, match):

248

if a.rev() in (nullrev, b.rev()):

248

if a.rev() in (nullrev, b.rev()):

249

return {}

249

return {}

250

251

repo = a.repo().unfiltered()

251

repo = a.repo().unfiltered()

252

children = {}

252

children = {}

253

254

cl = repo.changelog

254

cl = repo.changelog

255

isancestor = cl.isancestorrev

255

isancestor = cl.isancestorrev

256

257

# To track rename from "A" to B, we need to gather all parent → children

257

# To track rename from "A" to B, we need to gather all parent → children

258

# edges that are contains in `::B` but not in `::A`.

258

# edges that are contains in `::B` but not in `::A`.

259

#

259

#

260

#

260

#

261

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

261

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

262

# ::a`) and also all the "roots point", ie the parents of the exclusive set

262

# ::a`) and also all the "roots point", ie the parents of the exclusive set

263

# that belong to ::a. These are exactly all the revisions needed to express

263

# that belong to ::a. These are exactly all the revisions needed to express

264

# the parent → children we need to combine.

264

# the parent → children we need to combine.

265

#

265

#

266

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

266

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

267

# excluding paths that leads to roots that are not ancestors of `a`. We

267

# excluding paths that leads to roots that are not ancestors of `a`. We

268

# keep this out of the explanation because it is hard enough without this special case..

268

# keep this out of the explanation because it is hard enough without this special case..

269

270

parents = cl._uncheckedparentrevs

270

parents = cl._uncheckedparentrevs

271

graph_roots = (nullrev, nullrev)

271

graph_roots = (nullrev, nullrev)

272

273

ancestors = cl.ancestors([a.rev()], inclusive=True)

273

ancestors = cl.ancestors([a.rev()], inclusive=True)

274

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

274

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

275

roots = set()

275

roots = set()

276

has_graph_roots = False

276

has_graph_roots = False

277

multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')

277

multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')

278

279

# iterate over `only(B, A)`

279

# iterate over `only(B, A)`

280

for r in revs:

280

for r in revs:

281

ps = parents(r)

281

ps = parents(r)

282

if ps == graph_roots:

282

if ps == graph_roots:

283

has_graph_roots = True

283

has_graph_roots = True

284

else:

284

else:

285

p1, p2 = ps

285

p1, p2 = ps

286

287

# find all the "root points" (see larger comment above)

287

# find all the "root points" (see larger comment above)

288

if p1 != nullrev and p1 in ancestors:

288

if p1 != nullrev and p1 in ancestors:

289

roots.add(p1)

289

roots.add(p1)

290

if p2 != nullrev and p2 in ancestors:

290

if p2 != nullrev and p2 in ancestors:

291

roots.add(p2)

291

roots.add(p2)

292

if not roots:

292

if not roots:

293

# no common revision to track copies from

293

# no common revision to track copies from

294

return {}

294

return {}

295

if has_graph_roots:

295

if has_graph_roots:

296

# this deal with the special case mentionned in the [1] footnotes. We

296

# this deal with the special case mentionned in the [1] footnotes. We

297

# must filter out revisions that leads to non-common graphroots.

297

# must filter out revisions that leads to non-common graphroots.

298

roots = list(roots)

298

roots = list(roots)

299

m = min(roots)

299

m = min(roots)

300

h = [b.rev()]

300

h = [b.rev()]

301

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

301

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

302

roots_to_head = set(roots_to_head)

302

roots_to_head = set(roots_to_head)

303

revs = [r for r in revs if r in roots_to_head]

303

revs = [r for r in revs if r in roots_to_head]

304

305

if repo.filecopiesmode == b'changeset-sidedata':

305

if repo.filecopiesmode == b'changeset-sidedata':

306

# When using side-data, we will process the edges "from" the children.

306

# When using side-data, we will process the edges "from" the children.

307

# We iterate over the childre, gathering previous collected data for

307

# We iterate over the childre, gathering previous collected data for

308

# the parents. Do know when the parents data is no longer necessary, we

308

# the parents. Do know when the parents data is no longer necessary, we

309

# keep a counter of how many children each revision has.

309

# keep a counter of how many children each revision has.

310

#

310

#

311

# An interresting property of `children_count` is that it only contains

311

# An interresting property of `children_count` is that it only contains

312

# revision that will be relevant for a edge of the graph. So if a

312

# revision that will be relevant for a edge of the graph. So if a

313

# children has parent not in `children_count`, that edges should not be

313

# children has parent not in `children_count`, that edges should not be

314

# processed.

314

# processed.

315

children_count = dict((r, 0) for r in roots)

315

children_count = dict((r, 0) for r in roots)

316

for r in revs:

316

for r in revs:

317

for p in cl.parentrevs(r):

317

for p in cl.parentrevs(r):

318

if p == nullrev:

318

if p == nullrev:

319

continue

319

continue

320

children_count[r] = 0

320

children_count[r] = 0

321

if p in children_count:

321

if p in children_count:

322

children_count[p] += 1

322

children_count[p] += 1

323

revinfo = _revinfo_getter(repo, match)

323

revinfo = _revinfo_getter(repo, match)

324

return _combine_changeset_copies(

324

return _combine_changeset_copies(

325

revs,

325

revs,

326

children_count,

326

children_count,

327

b.rev(),

327

b.rev(),

328

revinfo,

328

revinfo,

329

match,

329

match,

330

isancestor,

330

isancestor,

331

multi_thread,

331

multi_thread,

332

)

332

)

333

else:

333

else:

334

# When not using side-data, we will process the edges "from" the parent.

334

# When not using side-data, we will process the edges "from" the parent.

335

# so we need a full mapping of the parent -> children relation.

335

# so we need a full mapping of the parent -> children relation.

336

children = dict((r, []) for r in roots)

336

children = dict((r, []) for r in roots)

337

for r in revs:

337

for r in revs:

338

for p in cl.parentrevs(r):

338

for p in cl.parentrevs(r):

339

if p == nullrev:

339

if p == nullrev:

340

continue

340

continue

341

children[r] = []

341

children[r] = []

342

if p in children:

342

if p in children:

343

children[p].append(r)

343

children[p].append(r)

344

x = revs.pop()

344

x = revs.pop()

345

assert x == b.rev()

345

assert x == b.rev()

346

revs.extend(roots)

346

revs.extend(roots)

347

revs.sort()

347

revs.sort()

348

349

revinfo = _revinfo_getter_extra(repo)

349

revinfo = _revinfo_getter_extra(repo)

350

return _combine_changeset_copies_extra(

350

return _combine_changeset_copies_extra(

351

revs, children, b.rev(), revinfo, match, isancestor

351

revs, children, b.rev(), revinfo, match, isancestor

352

)

352

)

353

354

355

def _combine_changeset_copies(

355

def _combine_changeset_copies(

356

revs, children_count, targetrev, revinfo, match, isancestor, multi_thread

356

revs, children_count, targetrev, revinfo, match, isancestor, multi_thread

357

):

357

):

358

"""combine the copies information for each item of iterrevs

358

"""combine the copies information for each item of iterrevs

359

360

revs: sorted iterable of revision to visit

360

revs: sorted iterable of revision to visit

361

children_count: a {parent: <number-of-relevant-children>} mapping.

361

children_count: a {parent: <number-of-relevant-children>} mapping.

362

targetrev: the final copies destination revision (not in iterrevs)

362

targetrev: the final copies destination revision (not in iterrevs)

363

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

363

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

364

match: a matcher

364

match: a matcher

365

366

It returns the aggregated copies information for `targetrev`.

366

It returns the aggregated copies information for `targetrev`.

367

"""

367

"""

368

369

alwaysmatch = match.always()

369

alwaysmatch = match.always()

370

371

if rustmod is not None:

371

if rustmod is not None:

372

final_copies = rustmod.combine_changeset_copies(

372

final_copies = rustmod.combine_changeset_copies(

373

list(revs), children_count, targetrev, revinfo, multi_thread

373

list(revs), children_count, targetrev, revinfo, multi_thread

374

)

374

)

375

else:

375

else:

376

isancestor = cached_is_ancestor(isancestor)

376

isancestor = cached_is_ancestor(isancestor)

377

378

all_copies = {}

378

all_copies = {}

379

# iterate over all the "children" side of copy tracing "edge"

379

# iterate over all the "children" side of copy tracing "edge"

380

for current_rev in revs:

380

for current_rev in revs:

381

p1, p2, changes = revinfo(current_rev)

381

p1, p2, changes = revinfo(current_rev)

382

current_copies = None

382

current_copies = None

383

# iterate over all parents to chain the existing data with the

383

# iterate over all parents to chain the existing data with the

384

# data from the parent → child edge.

384

# data from the parent → child edge.

385

for parent, parent_rev in ((1, p1), (2, p2)):

385

for parent, parent_rev in ((1, p1), (2, p2)):

386

if parent_rev == nullrev:

386

if parent_rev == nullrev:

387

continue

387

continue

388

remaining_children = children_count.get(parent_rev)

388

remaining_children = children_count.get(parent_rev)

389

if remaining_children is None:

389

if remaining_children is None:

390

continue

390

continue

391

remaining_children -= 1

391

remaining_children -= 1

392

children_count[parent_rev] = remaining_children

392

children_count[parent_rev] = remaining_children

393

if remaining_children:

393

if remaining_children:

394

copies = all_copies.get(parent_rev, None)

394

copies = all_copies.get(parent_rev, None)

395

else:

395

else:

396

copies = all_copies.pop(parent_rev, None)

396

copies = all_copies.pop(parent_rev, None)

397

398

if copies is None:

398

if copies is None:

399

# this is a root

399

# this is a root

400

newcopies = copies = {}

400

newcopies = copies = {}

401

elif remaining_children:

401

elif remaining_children:

402

newcopies = copies.copy()

402

newcopies = copies.copy()

403

else:

403

else:

404

newcopies = copies

404

newcopies = copies

405

# chain the data in the edge with the existing data

405

# chain the data in the edge with the existing data

406

if changes is not None:

406

if changes is not None:

407

childcopies = {}

407

childcopies = {}

408

if parent == 1:

408

if parent == 1:

409

childcopies = changes.copied_from_p1

409

childcopies = changes.copied_from_p1

410

elif parent == 2:

410

elif parent == 2:

411

childcopies = changes.copied_from_p2

411

childcopies = changes.copied_from_p2

412

413

if childcopies:

413

if childcopies:

414

newcopies = copies.copy()

414

newcopies = copies.copy()

415

for dest, source in pycompat.iteritems(childcopies):

415

for dest, source in pycompat.iteritems(childcopies):

416

prev = copies.get(source)

416

prev = copies.get(source)

417

if prev is not None and prev[1] is not None:

417

if prev is not None and prev[1] is not None:

418

source = prev[1]

418

source = prev[1]

419

newcopies[dest] = (current_rev, source)

419

newcopies[dest] = (current_rev, source)

420

assert newcopies is not copies

420

assert newcopies is not copies

421

if changes.removed:

421

if changes.removed:

422

for f in changes.removed:

422

for f in changes.removed:

423

if f in newcopies:

423

if f in newcopies:

424

if newcopies is copies:

424

if newcopies is copies:

425

# copy on write to avoid affecting potential other

425

# copy on write to avoid affecting potential other

426

# branches. when there are no other branches, this

426

# branches. when there are no other branches, this

427

# could be avoided.

427

# could be avoided.

428

newcopies = copies.copy()

428

newcopies = copies.copy()

429

newcopies[f] = (current_rev, None)

429

newcopies[f] = (current_rev, None)

430

# check potential need to combine the data from another parent (for

430

# check potential need to combine the data from another parent (for

431

# that child). See comment below for details.

431

# that child). See comment below for details.

432

if current_copies is None:

432

if current_copies is None:

433

current_copies = newcopies

433

current_copies = newcopies

434

else:

434

else:

435

# we are the second parent to work on c, we need to merge our

435

# we are the second parent to work on c, we need to merge our

436

# work with the other.

436

# work with the other.

437

#

437

#

438

# In case of conflict, parent 1 take precedence over parent 2.

438

# In case of conflict, parent 1 take precedence over parent 2.

439

# This is an arbitrary choice made anew when implementing

439

# This is an arbitrary choice made anew when implementing

440

# changeset based copies. It was made without regards with

440

# changeset based copies. It was made without regards with

441

# potential filelog related behavior.

441

# potential filelog related behavior.

442

assert parent == 2

442

assert parent == 2

443

current_copies = _merge_copies_dict(

443

current_copies = _merge_copies_dict(

444

newcopies,

444

newcopies,

445

current_copies,

445

current_copies,

446

isancestor,

446

isancestor,

447

changes,

447

changes,

448

current_rev,

448

current_rev,

449

)

449

)

450

all_copies[current_rev] = current_copies

450

all_copies[current_rev] = current_copies

451

452

# filter out internal details and return a {dest: source mapping}

452

# filter out internal details and return a {dest: source mapping}

453

final_copies = {}

453

final_copies = {}

454

for dest, (tt, source) in all_copies[targetrev].items():

454

for dest, (tt, source) in all_copies[targetrev].items():

455

if source is not None:

455

if source is not None:

456

final_copies[dest] = source

456

final_copies[dest] = source

457

if not alwaysmatch:

457

if not alwaysmatch:

458

for filename in list(final_copies.keys()):

458

for filename in list(final_copies.keys()):

459

if not match(filename):

459

if not match(filename):

460

del final_copies[filename]

460

del final_copies[filename]

461

return final_copies

461

return final_copies

462

463

464

# constant to decide which side to pick with _merge_copies_dict

464

# constant to decide which side to pick with _merge_copies_dict

465

PICK_MINOR = 0

465

PICK_MINOR = 0

466

PICK_MAJOR = 1

466

PICK_MAJOR = 1

467

PICK_EITHER = 2

467

PICK_EITHER = 2

468

469

470

def _merge_copies_dict(minor, major, isancestor, changes, current_merge):

470

def _merge_copies_dict(minor, major, isancestor, changes, current_merge):

471

"""merge two copies-mapping together, minor and major

471

"""merge two copies-mapping together, minor and major

472

473

In case of conflict, value from "major" will be picked.

473

In case of conflict, value from "major" will be picked.

474

475

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

475

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

476

ancestors of `high_rev`,

476

ancestors of `high_rev`,

477

478

- `ismerged(path)`: callable return True if `path` have been merged in the

478

- `ismerged(path)`: callable return True if `path` have been merged in the

479

current revision,

479

current revision,

480

481

return the resulting dict (in practice, the "minor" object, updated)

481

return the resulting dict (in practice, the "minor" object, updated)

482

"""

482

"""

483

for dest, value in major.items():

483

for dest, value in major.items():

484

other = minor.get(dest)

484

other = minor.get(dest)

485

if other is None:

485

if other is None:

486

minor[dest] = value

486

minor[dest] = value

487

else:

487

else:

488

pick, overwrite = _compare_values(

488

pick, overwrite = _compare_values(

489

changes, isancestor, dest, other, value

489

changes, isancestor, dest, other, value

490

)

490

)

491

if overwrite:

491

if overwrite:

492

if pick == PICK_MAJOR:

492

if pick == PICK_MAJOR:

493

minor[dest] = (current_merge, value[1])

493

minor[dest] = (current_merge, value[1])

494

else:

494

else:

495

minor[dest] = (current_merge, other[1])

495

minor[dest] = (current_merge, other[1])

496

elif pick == PICK_MAJOR:

496

elif pick == PICK_MAJOR:

497

minor[dest] = value

497

minor[dest] = value

498

return minor

498

return minor

499

500

501

def _compare_values(changes, isancestor, dest, minor, major):

501

def _compare_values(changes, isancestor, dest, minor, major):

502

"""compare two value within a _merge_copies_dict loop iteration

502

"""compare two value within a _merge_copies_dict loop iteration

503

504

return (pick, overwrite).

504

return (pick, overwrite).

505

506

- pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER

506

- pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER

507

- overwrite is True if pick is a return of an ambiguity that needs resolution.

507

- overwrite is True if pick is a return of an ambiguity that needs resolution.

508

"""

508

"""

509

major_tt, major_value = major

509

major_tt, major_value = major

510

minor_tt, minor_value = minor

510

minor_tt, minor_value = minor

511

512

if major_tt == minor_tt:

512

if major_tt == minor_tt:

513

# if it comes from the same revision it must be the same value

513

# if it comes from the same revision it must be the same value

514

assert major_value == minor_value

514

assert major_value == minor_value

515

return PICK_EITHER, False

515

return PICK_EITHER, False

516

elif (

516

elif (

517

changes is not None

517

changes is not None

518

and minor_value is not None

518

and minor_value is not None

519

and major_value is None

519

and major_value is None

520

and dest in changes.salvaged

520

and dest in changes.salvaged

521

):

521

):

522

# In this case, a deletion was reverted, the "alive" value overwrite

522

# In this case, a deletion was reverted, the "alive" value overwrite

523

# the deleted one.

523

# the deleted one.

524

return PICK_MINOR, True

524

return PICK_MINOR, True

525

elif (

525

elif (

526

changes is not None

526

changes is not None

527

and major_value is not None

527

and major_value is not None

528

and minor_value is None

528

and minor_value is None

529

and dest in changes.salvaged

529

and dest in changes.salvaged

530

):

530

):

531

# In this case, a deletion was reverted, the "alive" value overwrite

531

# In this case, a deletion was reverted, the "alive" value overwrite

532

# the deleted one.

532

# the deleted one.

533

return PICK_MAJOR, True

533

return PICK_MAJOR, True

534

elif isancestor(minor_tt, major_tt):

534

elif isancestor(minor_tt, major_tt):

535

if changes is not None and dest in changes.merged:

535

if changes is not None and dest in changes.merged:

536

# change to dest happened on the branch without copy-source change,

536

# change to dest happened on the branch without copy-source change,

537

# so both source are valid and "major" wins.

537

# so both source are valid and "major" wins.

538

return PICK_MAJOR, True

538

return PICK_MAJOR, True

539

else:

539

else:

540

return PICK_MAJOR, False

540

return PICK_MAJOR, False

541

elif isancestor(major_tt, minor_tt):

541

elif isancestor(major_tt, minor_tt):

542

if changes is not None and dest in changes.merged:

542

if changes is not None and dest in changes.merged:

543

# change to dest happened on the branch without copy-source change,

543

# change to dest happened on the branch without copy-source change,

544

# so both source are valid and "major" wins.

544

# so both source are valid and "major" wins.

545

return PICK_MAJOR, True

545

return PICK_MAJOR, True

546

else:

546

else:

547

return PICK_MINOR, False

547

return PICK_MINOR, False

548

elif minor_value is None:

548

elif minor_value is None:

549

# in case of conflict, the "alive" side wins.

549

# in case of conflict, the "alive" side wins.

550

return PICK_MAJOR, True

550

return PICK_MAJOR, True

551

elif major_value is None:

551

elif major_value is None:

552

# in case of conflict, the "alive" side wins.

552

# in case of conflict, the "alive" side wins.

553

return PICK_MINOR, True

553

return PICK_MINOR, True

554

else:

554

else:

555

# in case of conflict where both side are alive, major wins.

555

# in case of conflict where both side are alive, major wins.

556

return PICK_MAJOR, True

556

return PICK_MAJOR, True

557

558

559

def _revinfo_getter_extra(repo):

559

def _revinfo_getter_extra(repo):

560

"""return a function that return multiple data given a <rev>"i

560

"""return a function that return multiple data given a <rev>"i

561

562

* p1: revision number of first parent

562

* p1: revision number of first parent

563

* p2: revision number of first parent

563

* p2: revision number of first parent

564

* p1copies: mapping of copies from p1

564

* p1copies: mapping of copies from p1

565

* p2copies: mapping of copies from p2

565

* p2copies: mapping of copies from p2

566

* removed: a list of removed files

566

* removed: a list of removed files

567

* ismerged: a callback to know if file was merged in that revision

567

* ismerged: a callback to know if file was merged in that revision

568

"""

568

"""

569

cl = repo.changelog

569

cl = repo.changelog

570

parents = cl.parentrevs

570

parents = cl.parentrevs

571

572

def get_ismerged(rev):

572

def get_ismerged(rev):

573

ctx = repo[rev]

573

ctx = repo[rev]

574

575

def ismerged(path):

575

def ismerged(path):

576

if path not in ctx.files():

576

if path not in ctx.files():

577

return False

577

return False

578

fctx = ctx[path]

578

fctx = ctx[path]

579

parents = fctx._filelog.parents(fctx._filenode)

579

parents = fctx._filelog.parents(fctx._filenode)

580

nb_parents = 0

580

nb_parents = 0

581

for n in parents:

581

for n in parents:

582

if n != nullid:

582

if n != nullid:

583

nb_parents += 1

583

nb_parents += 1

584

return nb_parents >= 2

584

return nb_parents >= 2

585

586

return ismerged

586

return ismerged

587

588

def revinfo(rev):

588

def revinfo(rev):

589

p1, p2 = parents(rev)

589

p1, p2 = parents(rev)

590

ctx = repo[rev]

590

ctx = repo[rev]

591

p1copies, p2copies = ctx._copies

591

p1copies, p2copies = ctx._copies

592

removed = ctx.filesremoved()

592

removed = ctx.filesremoved()

593

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

593

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

594

595

return revinfo

595

return revinfo

596

597

598

def _combine_changeset_copies_extra(

598

def _combine_changeset_copies_extra(

599

revs, children, targetrev, revinfo, match, isancestor

599

revs, children, targetrev, revinfo, match, isancestor

600

):

600

):

601

"""version of `_combine_changeset_copies` that works with the Google

601

"""version of `_combine_changeset_copies` that works with the Google

602

specific "extra" based storage for copy information"""

602

specific "extra" based storage for copy information"""

603

all_copies = {}

603

all_copies = {}

604

alwaysmatch = match.always()

604

alwaysmatch = match.always()

605

for r in revs:

605

for r in revs:

606

copies = all_copies.pop(r, None)

606

copies = all_copies.pop(r, None)

607

if copies is None:

607

if copies is None:

608

# this is a root

608

# this is a root

609

copies = {}

609

copies = {}

610

for i, c in enumerate(children[r]):

610

for i, c in enumerate(children[r]):

611

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

611

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

612

if r == p1:

612

if r == p1:

613

parent = 1

613

parent = 1

614

childcopies = p1copies

614

childcopies = p1copies

615

else:

615

else:

616

assert r == p2

616

assert r == p2

617

parent = 2

617

parent = 2

618

childcopies = p2copies

618

childcopies = p2copies

619

if not alwaysmatch:

619

if not alwaysmatch:

620

childcopies = {

620

childcopies = {

621

dst: src for dst, src in childcopies.items() if match(dst)

621

dst: src for dst, src in childcopies.items() if match(dst)

622

}

622

}

623

newcopies = copies

623

newcopies = copies

624

if childcopies:

624

if childcopies:

625

newcopies = copies.copy()

625

newcopies = copies.copy()

626

for dest, source in pycompat.iteritems(childcopies):

626

for dest, source in pycompat.iteritems(childcopies):

627

prev = copies.get(source)

627

prev = copies.get(source)

628

if prev is not None and prev[1] is not None:

628

if prev is not None and prev[1] is not None:

629

source = prev[1]

629

source = prev[1]

630

newcopies[dest] = (c, source)

630

newcopies[dest] = (c, source)

631

assert newcopies is not copies

631

assert newcopies is not copies

632

for f in removed:

632

for f in removed:

633

if f in newcopies:

633

if f in newcopies:

634

if newcopies is copies:

634

if newcopies is copies:

635

# copy on write to avoid affecting potential other

635

# copy on write to avoid affecting potential other

636

# branches. when there are no other branches, this

636

# branches. when there are no other branches, this

637

# could be avoided.

637

# could be avoided.

638

newcopies = copies.copy()

638

newcopies = copies.copy()

639

newcopies[f] = (c, None)

639

newcopies[f] = (c, None)

640

othercopies = all_copies.get(c)

640

othercopies = all_copies.get(c)

641

if othercopies is None:

641

if othercopies is None:

642

all_copies[c] = newcopies

642

all_copies[c] = newcopies

643

else:

643

else:

644

# we are the second parent to work on c, we need to merge our

644

# we are the second parent to work on c, we need to merge our

645

# work with the other.

645

# work with the other.

646

#

646

#

647

# In case of conflict, parent 1 take precedence over parent 2.

647

# In case of conflict, parent 1 take precedence over parent 2.

648

# This is an arbitrary choice made anew when implementing

648

# This is an arbitrary choice made anew when implementing

649

# changeset based copies. It was made without regards with

649

# changeset based copies. It was made without regards with

650

# potential filelog related behavior.

650

# potential filelog related behavior.

651

if parent == 1:

651

if parent == 1:

652

_merge_copies_dict_extra(

652

_merge_copies_dict_extra(

653

othercopies, newcopies, isancestor, ismerged

653

othercopies, newcopies, isancestor, ismerged

654

)

654

)

655

else:

655

else:

656

_merge_copies_dict_extra(

656

_merge_copies_dict_extra(

657

newcopies, othercopies, isancestor, ismerged

657

newcopies, othercopies, isancestor, ismerged

658

)

658

)

659

all_copies[c] = newcopies

659

all_copies[c] = newcopies

660

661

final_copies = {}

661

final_copies = {}

662

for dest, (tt, source) in all_copies[targetrev].items():

662

for dest, (tt, source) in all_copies[targetrev].items():

663

if source is not None:

663

if source is not None:

664

final_copies[dest] = source

664

final_copies[dest] = source

665

return final_copies

665

return final_copies

666

667

668

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

668

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

669

"""version of `_merge_copies_dict` that works with the Google

669

"""version of `_merge_copies_dict` that works with the Google

670

specific "extra" based storage for copy information"""

670

specific "extra" based storage for copy information"""

671

for dest, value in major.items():

671

for dest, value in major.items():

672

other = minor.get(dest)

672

other = minor.get(dest)

673

if other is None:

673

if other is None:

674

minor[dest] = value

674

minor[dest] = value

675

else:

675

else:

676

new_tt = value[0]

676

new_tt = value[0]

677

other_tt = other[0]

677

other_tt = other[0]

678

if value[1] == other[1]:

678

if value[1] == other[1]:

679

continue

679

continue

680

# content from "major" wins, unless it is older

680

# content from "major" wins, unless it is older

681

# than the branch point or there is a merge

681

# than the branch point or there is a merge

682

if (

682

if (

683

new_tt == other_tt

683

new_tt == other_tt

684

or not isancestor(new_tt, other_tt)

684

or not isancestor(new_tt, other_tt)

685

or ismerged(dest)

685

or ismerged(dest)

686

):

686

):

687

minor[dest] = value

687

minor[dest] = value

688

689

690

def _forwardcopies(a, b, base=None, match=None):

690

def _forwardcopies(a, b, base=None, match=None):

691

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

691

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

692

693

if base is None:

693

if base is None:

694

base = a

694

base = a

695

match = a.repo().narrowmatch(match)

695

match = a.repo().narrowmatch(match)

696

# check for working copy

696

# check for working copy

697

if b.rev() is None:

697

if b.rev() is None:

698

cm = _committedforwardcopies(a, b.p1(), base, match)

698

cm = _committedforwardcopies(a, b.p1(), base, match)

699

# combine copies from dirstate if necessary

699

# combine copies from dirstate if necessary

700

copies = _chain(cm, _dirstatecopies(b._repo, match))

700

copies = _chain(cm, _dirstatecopies(b._repo, match))

701

else:

701

else:

702

copies = _committedforwardcopies(a, b, base, match)

702

copies = _committedforwardcopies(a, b, base, match)

703

return copies

703

return copies

704

705

706

def _backwardrenames(a, b, match):

706

def _backwardrenames(a, b, match):

707

"""find renames from a to b"""

707

"""find renames from a to b"""

708

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

708

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

709

return {}

709

return {}

710

711

# We don't want to pass in "match" here, since that would filter

711

# We don't want to pass in "match" here, since that would filter

712

# the destination by it. Since we're reversing the copies, we want

712

# the destination by it. Since we're reversing the copies, we want

713

# to filter the source instead.

713

# to filter the source instead.

714

copies = _forwardcopies(b, a)

714

copies = _forwardcopies(b, a)

715

return _reverse_renames(copies, a, match)

715

return _reverse_renames(copies, a, match)

716

717

718

def _reverse_renames(copies, dst, match):

718

def _reverse_renames(copies, dst, match):

719

"""given copies to context 'dst', finds renames from that context"""

719

"""given copies to context 'dst', finds renames from that context"""

720

# Even though we're not taking copies into account, 1:n rename situations

720

# Even though we're not taking copies into account, 1:n rename situations

721

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

721

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

722

# arbitrarily pick one of the renames.

722

# arbitrarily pick one of the renames.

723

r = {}

723

r = {}

724

for k, v in sorted(pycompat.iteritems(copies)):

724

for k, v in sorted(pycompat.iteritems(copies)):

725

if match and not match(v):

725

if match and not match(v):

726

continue

726

continue

727

# remove copies

727

# remove copies

728

if v in dst:

728

if v in dst:

729

continue

729

continue

730

r[v] = k

730

r[v] = k

731

return r

731

return r

732

733

734

def pathcopies(x, y, match=None):

734

def pathcopies(x, y, match=None):

735

"""find {dst@y: src@x} copy mapping for directed compare"""

735

"""find {dst@y: src@x} copy mapping for directed compare"""

736

repo = x._repo

736

repo = x._repo

737

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

737

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

738

if debug:

738

if debug:

739

repo.ui.debug(

739

repo.ui.debug(

740

b'debug.copies: searching copies from %s to %s\n' % (x, y)

740

b'debug.copies: searching copies from %s to %s\n' % (x, y)

741

)

741

)

742

if x == y or not x or not y:

742

if x == y or not x or not y:

743

return {}

743

return {}

744

if y.rev() is None and x == y.p1():

744

if y.rev() is None and x == y.p1():

745

if debug:

745

if debug:

746

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

746

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

747

# short-circuit to avoid issues with merge states

747

# short-circuit to avoid issues with merge states

748

return _dirstatecopies(repo, match)

748

return _dirstatecopies(repo, match)

749

a = y.ancestor(x)

749

a = y.ancestor(x)

750

if a == x:

750

if a == x:

751

if debug:

751

if debug:

752

repo.ui.debug(b'debug.copies: search mode: forward\n')

752

repo.ui.debug(b'debug.copies: search mode: forward\n')

753

copies = _forwardcopies(x, y, match=match)

753

copies = _forwardcopies(x, y, match=match)

754

elif a == y:

754

elif a == y:

755

if debug:

755

if debug:

756

repo.ui.debug(b'debug.copies: search mode: backward\n')

756

repo.ui.debug(b'debug.copies: search mode: backward\n')

757

copies = _backwardrenames(x, y, match=match)

757

copies = _backwardrenames(x, y, match=match)

758

else:

758

else:

759

if debug:

759

if debug:

760

repo.ui.debug(b'debug.copies: search mode: combined\n')

760

repo.ui.debug(b'debug.copies: search mode: combined\n')

761

base = None

761

base = None

762

if a.rev() != nullrev:

762

if a.rev() != nullrev:

763

base = x

763

base = x

764

x_copies = _forwardcopies(a, x)

765

y_copies = _forwardcopies(a, y, base, match=match)

766

x_backward_renames = _reverse_renames(x_copies, x, match)

764

copies = _chain(

767

copies = _chain(

765

_backwardrenames(x, a, ~~match~~=~~match~~),

768

x_backward_renames,

766

_forwardcopies(a, y, base, match=match),

769

y_copies,

767

)

770

)

768

_filter(x, y, copies)

771

_filter(x, y, copies)

769

return copies

772

return copies

770

773

771

774

772

def mergecopies(repo, c1, c2, base):

775

def mergecopies(repo, c1, c2, base):

773

"""

776

"""

774

Finds moves and copies between context c1 and c2 that are relevant for

777

Finds moves and copies between context c1 and c2 that are relevant for

775

merging. 'base' will be used as the merge base.

778

merging. 'base' will be used as the merge base.

776

779

777

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

780

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

778

files that were moved/ copied in one merge parent and modified in another.

781

files that were moved/ copied in one merge parent and modified in another.

779

For example:

782

For example:

780

783

781

o ---> 4 another commit

784

o ---> 4 another commit

782

|

785

|

783

| o ---> 3 commit that modifies a.txt

786

| o ---> 3 commit that modifies a.txt

784

| /

787

| /

785

o / ---> 2 commit that moves a.txt to b.txt

788

o / ---> 2 commit that moves a.txt to b.txt

786

|/

789

|/

787

o ---> 1 merge base

790

o ---> 1 merge base

788

791

789

If we try to rebase revision 3 on revision 4, since there is no a.txt in

792

If we try to rebase revision 3 on revision 4, since there is no a.txt in

790

revision 4, and if user have copytrace disabled, we prints the following

793

revision 4, and if user have copytrace disabled, we prints the following

791

message:

794

message:

792

795

793

```other changed <file> which local deleted```

796

```other changed <file> which local deleted```

794

797

795

Returns a tuple where:

798

Returns a tuple where:

796

799

797

"branch_copies" an instance of branch_copies.

800

"branch_copies" an instance of branch_copies.

798

801

799

"diverge" is a mapping of source name -> list of destination names

802

"diverge" is a mapping of source name -> list of destination names

800

for divergent renames.

803

for divergent renames.

801

804

802

This function calls different copytracing algorithms based on config.

805

This function calls different copytracing algorithms based on config.

803

"""

806

"""

804

# avoid silly behavior for update from empty dir

807

# avoid silly behavior for update from empty dir

805

if not c1 or not c2 or c1 == c2:

808

if not c1 or not c2 or c1 == c2:

806

return branch_copies(), branch_copies(), {}

809

return branch_copies(), branch_copies(), {}

807

810

808

narrowmatch = c1.repo().narrowmatch()

811

narrowmatch = c1.repo().narrowmatch()

809

812

810

# avoid silly behavior for parent -> working dir

813

# avoid silly behavior for parent -> working dir

811

if c2.node() is None and c1.node() == repo.dirstate.p1():

814

if c2.node() is None and c1.node() == repo.dirstate.p1():

812

return (

815

return (

813

branch_copies(_dirstatecopies(repo, narrowmatch)),

816

branch_copies(_dirstatecopies(repo, narrowmatch)),

814

branch_copies(),

817

branch_copies(),

815

{},

818

{},

816

)

819

)

817

820

818

copytracing = repo.ui.config(b'experimental', b'copytrace')

821

copytracing = repo.ui.config(b'experimental', b'copytrace')

819

if stringutil.parsebool(copytracing) is False:

822

if stringutil.parsebool(copytracing) is False:

820

# stringutil.parsebool() returns None when it is unable to parse the

823

# stringutil.parsebool() returns None when it is unable to parse the

821

# value, so we should rely on making sure copytracing is on such cases

824

# value, so we should rely on making sure copytracing is on such cases

822

return branch_copies(), branch_copies(), {}

825

return branch_copies(), branch_copies(), {}

823

826

824

if usechangesetcentricalgo(repo):

827

if usechangesetcentricalgo(repo):

825

# The heuristics don't make sense when we need changeset-centric algos

828

# The heuristics don't make sense when we need changeset-centric algos

826

return _fullcopytracing(repo, c1, c2, base)

829

return _fullcopytracing(repo, c1, c2, base)

827

830

828

# Copy trace disabling is explicitly below the node == p1 logic above

831

# Copy trace disabling is explicitly below the node == p1 logic above

829

# because the logic above is required for a simple copy to be kept across a

832

# because the logic above is required for a simple copy to be kept across a

830

# rebase.

833

# rebase.

831

if copytracing == b'heuristics':

834

if copytracing == b'heuristics':

832

# Do full copytracing if only non-public revisions are involved as

835

# Do full copytracing if only non-public revisions are involved as

833

# that will be fast enough and will also cover the copies which could

836

# that will be fast enough and will also cover the copies which could

834

# be missed by heuristics

837

# be missed by heuristics

835

if _isfullcopytraceable(repo, c1, base):

838

if _isfullcopytraceable(repo, c1, base):

836

return _fullcopytracing(repo, c1, c2, base)

839

return _fullcopytracing(repo, c1, c2, base)

837

return _heuristicscopytracing(repo, c1, c2, base)

840

return _heuristicscopytracing(repo, c1, c2, base)

838

else:

841

else:

839

return _fullcopytracing(repo, c1, c2, base)

842

return _fullcopytracing(repo, c1, c2, base)

840

843

841

844

842

def _isfullcopytraceable(repo, c1, base):

845

def _isfullcopytraceable(repo, c1, base):

843

"""Checks that if base, source and destination are all no-public branches,

846

"""Checks that if base, source and destination are all no-public branches,

844

if yes let's use the full copytrace algorithm for increased capabilities

847

if yes let's use the full copytrace algorithm for increased capabilities

845

since it will be fast enough.

848

since it will be fast enough.

846

849

847

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

850

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

848

number of changesets from c1 to base such that if number of changesets are

851

number of changesets from c1 to base such that if number of changesets are

849

more than the limit, full copytracing algorithm won't be used.

852

more than the limit, full copytracing algorithm won't be used.

850

"""

853

"""

851

if c1.rev() is None:

854

if c1.rev() is None:

852

c1 = c1.p1()

855

c1 = c1.p1()

853

if c1.mutable() and base.mutable():

856

if c1.mutable() and base.mutable():

854

sourcecommitlimit = repo.ui.configint(

857

sourcecommitlimit = repo.ui.configint(

855

b'experimental', b'copytrace.sourcecommitlimit'

858

b'experimental', b'copytrace.sourcecommitlimit'

856

)

859

)

857

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

860

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

858

return commits < sourcecommitlimit

861

return commits < sourcecommitlimit

859

return False

862

return False

860

863

861

864

862

def _checksinglesidecopies(

865

def _checksinglesidecopies(

863

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

866

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

864

):

867

):

865

if src not in m2:

868

if src not in m2:

866

# deleted on side 2

869

# deleted on side 2

867

if src not in m1:

870

if src not in m1:

868

# renamed on side 1, deleted on side 2

871

# renamed on side 1, deleted on side 2

869

renamedelete[src] = dsts1

872

renamedelete[src] = dsts1

870

elif src not in mb:

873

elif src not in mb:

871

# Work around the "short-circuit to avoid issues with merge states"

874

# Work around the "short-circuit to avoid issues with merge states"

872

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

875

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

873

# destination doesn't exist in y.

876

# destination doesn't exist in y.

874

pass

877

pass

875

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

878

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

876

return

879

return

877

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

880

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

878

# modified on side 2

881

# modified on side 2

879

for dst in dsts1:

882

for dst in dsts1:

880

copy[dst] = src

883

copy[dst] = src

881

884

882

885

883

class branch_copies(object):

886

class branch_copies(object):

884

"""Information about copies made on one side of a merge/graft.

887

"""Information about copies made on one side of a merge/graft.

885

888

886

"copy" is a mapping from destination name -> source name,

889

"copy" is a mapping from destination name -> source name,

887

where source is in c1 and destination is in c2 or vice-versa.

890

where source is in c1 and destination is in c2 or vice-versa.

888

891

889

"movewithdir" is a mapping from source name -> destination name,

892

"movewithdir" is a mapping from source name -> destination name,

890

where the file at source present in one context but not the other

893

where the file at source present in one context but not the other

891

needs to be moved to destination by the merge process, because the

894

needs to be moved to destination by the merge process, because the

892

other context moved the directory it is in.

895

other context moved the directory it is in.

893

896

894

"renamedelete" is a mapping of source name -> list of destination

897

"renamedelete" is a mapping of source name -> list of destination

895

names for files deleted in c1 that were renamed in c2 or vice-versa.

898

names for files deleted in c1 that were renamed in c2 or vice-versa.

896

899

897

"dirmove" is a mapping of detected source dir -> destination dir renames.

900

"dirmove" is a mapping of detected source dir -> destination dir renames.

898

This is needed for handling changes to new files previously grafted into

901

This is needed for handling changes to new files previously grafted into

899

renamed directories.

902

renamed directories.

900

"""

903

"""

901

904

902

def __init__(

905

def __init__(

903

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

906

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

904

):

907

):

905

self.copy = {} if copy is None else copy

908

self.copy = {} if copy is None else copy

906

self.renamedelete = {} if renamedelete is None else renamedelete

909

self.renamedelete = {} if renamedelete is None else renamedelete

907

self.dirmove = {} if dirmove is None else dirmove

910

self.dirmove = {} if dirmove is None else dirmove

908

self.movewithdir = {} if movewithdir is None else movewithdir

911

self.movewithdir = {} if movewithdir is None else movewithdir

909

912

910

def __repr__(self):

913

def __repr__(self):

911

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

914

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

912

self.copy,

915

self.copy,

913

self.renamedelete,

916

self.renamedelete,

914

self.dirmove,

917

self.dirmove,

915

self.movewithdir,

918

self.movewithdir,

916

)

919

)

917

920

918

921

919

def _fullcopytracing(repo, c1, c2, base):

922

def _fullcopytracing(repo, c1, c2, base):

920

"""The full copytracing algorithm which finds all the new files that were

923

"""The full copytracing algorithm which finds all the new files that were

921

added from merge base up to the top commit and for each file it checks if

924

added from merge base up to the top commit and for each file it checks if

922

this file was copied from another file.

925

this file was copied from another file.

923

926

924

This is pretty slow when a lot of changesets are involved but will track all

927

This is pretty slow when a lot of changesets are involved but will track all

925

the copies.

928

the copies.

926

"""

929

"""

927

m1 = c1.manifest()

930

m1 = c1.manifest()

928

m2 = c2.manifest()

931

m2 = c2.manifest()

929

mb = base.manifest()

932

mb = base.manifest()

930

933

931

copies1 = pathcopies(base, c1)

934

copies1 = pathcopies(base, c1)

932

copies2 = pathcopies(base, c2)

935

copies2 = pathcopies(base, c2)

933

936

934

if not (copies1 or copies2):

937

if not (copies1 or copies2):

935

return branch_copies(), branch_copies(), {}

938

return branch_copies(), branch_copies(), {}

936

939

937

inversecopies1 = {}

940

inversecopies1 = {}

938

inversecopies2 = {}

941

inversecopies2 = {}

939

for dst, src in copies1.items():

942

for dst, src in copies1.items():

940

inversecopies1.setdefault(src, []).append(dst)

943

inversecopies1.setdefault(src, []).append(dst)

941

for dst, src in copies2.items():

944

for dst, src in copies2.items():

942

inversecopies2.setdefault(src, []).append(dst)

945

inversecopies2.setdefault(src, []).append(dst)

943

946

944

copy1 = {}

947

copy1 = {}

945

copy2 = {}

948

copy2 = {}

946

diverge = {}

949

diverge = {}

947

renamedelete1 = {}

950

renamedelete1 = {}

948

renamedelete2 = {}

951

renamedelete2 = {}

949

allsources = set(inversecopies1) | set(inversecopies2)

952

allsources = set(inversecopies1) | set(inversecopies2)

950

for src in allsources:

953

for src in allsources:

951

dsts1 = inversecopies1.get(src)

954

dsts1 = inversecopies1.get(src)

952

dsts2 = inversecopies2.get(src)

955

dsts2 = inversecopies2.get(src)

953

if dsts1 and dsts2:

956

if dsts1 and dsts2:

954

# copied/renamed on both sides

957

# copied/renamed on both sides

955

if src not in m1 and src not in m2:

958

if src not in m1 and src not in m2:

956

# renamed on both sides

959

# renamed on both sides

957

dsts1 = set(dsts1)

960

dsts1 = set(dsts1)

958

dsts2 = set(dsts2)

961

dsts2 = set(dsts2)

959

# If there's some overlap in the rename destinations, we

962

# If there's some overlap in the rename destinations, we

960

# consider it not divergent. For example, if side 1 copies 'a'

963

# consider it not divergent. For example, if side 1 copies 'a'

961

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

964

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

962

# and 'd' and deletes 'a'.

965

# and 'd' and deletes 'a'.

963

if dsts1 & dsts2:

966

if dsts1 & dsts2:

964

for dst in dsts1 & dsts2:

967

for dst in dsts1 & dsts2:

965

copy1[dst] = src

968

copy1[dst] = src

966

copy2[dst] = src

969

copy2[dst] = src

967

else:

970

else:

968

diverge[src] = sorted(dsts1 | dsts2)

971

diverge[src] = sorted(dsts1 | dsts2)

969

elif src in m1 and src in m2:

972

elif src in m1 and src in m2:

970

# copied on both sides

973

# copied on both sides

971

dsts1 = set(dsts1)

974

dsts1 = set(dsts1)

972

dsts2 = set(dsts2)

975

dsts2 = set(dsts2)

973

for dst in dsts1 & dsts2:

976

for dst in dsts1 & dsts2:

974

copy1[dst] = src

977

copy1[dst] = src

975

copy2[dst] = src

978

copy2[dst] = src

976

# TODO: Handle cases where it was renamed on one side and copied

979

# TODO: Handle cases where it was renamed on one side and copied

977

# on the other side

980

# on the other side

978

elif dsts1:

981

elif dsts1:

979

# copied/renamed only on side 1

982

# copied/renamed only on side 1

980

_checksinglesidecopies(

983

_checksinglesidecopies(

981

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

984

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

982

)

985

)

983

elif dsts2:

986

elif dsts2:

984

# copied/renamed only on side 2

987

# copied/renamed only on side 2

985

_checksinglesidecopies(

988

_checksinglesidecopies(

986

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

989

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

987

)

990

)

988

991

989

# find interesting file sets from manifests

992

# find interesting file sets from manifests

990

cache = []

993

cache = []

991

994

992

def _get_addedfiles(idx):

995

def _get_addedfiles(idx):

993

if not cache:

996

if not cache:

994

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

997

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

995

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

998

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

996

u1 = sorted(addedinm1 - addedinm2)

999

u1 = sorted(addedinm1 - addedinm2)

997

u2 = sorted(addedinm2 - addedinm1)

1000

u2 = sorted(addedinm2 - addedinm1)

998

cache.extend((u1, u2))

1001

cache.extend((u1, u2))

999

return cache[idx]

1002

return cache[idx]

1000

1003

1001

u1fn = lambda: _get_addedfiles(0)

1004

u1fn = lambda: _get_addedfiles(0)

1002

u2fn = lambda: _get_addedfiles(1)

1005

u2fn = lambda: _get_addedfiles(1)

1003

if repo.ui.debugflag:

1006

if repo.ui.debugflag:

1004

u1 = u1fn()

1007

u1 = u1fn()

1005

u2 = u2fn()

1008

u2 = u2fn()

1006

1009

1007

header = b" unmatched files in %s"

1010

header = b" unmatched files in %s"

1008

if u1:

1011

if u1:

1009

repo.ui.debug(

1012

repo.ui.debug(

1010

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

1013

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

1011

)

1014

)

1012

if u2:

1015

if u2:

1013

repo.ui.debug(

1016

repo.ui.debug(

1014

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

1017

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

1015

)

1018

)

1016

1019

1017

renamedeleteset = set()

1020

renamedeleteset = set()

1018

divergeset = set()

1021

divergeset = set()

1019

for dsts in diverge.values():

1022

for dsts in diverge.values():

1020

divergeset.update(dsts)

1023

divergeset.update(dsts)

1021

for dsts in renamedelete1.values():

1024

for dsts in renamedelete1.values():

1022

renamedeleteset.update(dsts)

1025

renamedeleteset.update(dsts)

1023

for dsts in renamedelete2.values():

1026

for dsts in renamedelete2.values():

1024

renamedeleteset.update(dsts)

1027

renamedeleteset.update(dsts)

1025

1028

1026

repo.ui.debug(

1029

repo.ui.debug(

1027

b" all copies found (* = to merge, ! = divergent, "

1030

b" all copies found (* = to merge, ! = divergent, "

1028

b"% = renamed and deleted):\n"

1031

b"% = renamed and deleted):\n"

1029

)

1032

)

1030

for side, copies in ((b"local", copies1), (b"remote", copies2)):

1033

for side, copies in ((b"local", copies1), (b"remote", copies2)):

1031

if not copies:

1034

if not copies:

1032

continue

1035

continue

1033

repo.ui.debug(b" on %s side:\n" % side)

1036

repo.ui.debug(b" on %s side:\n" % side)

1034

for f in sorted(copies):

1037

for f in sorted(copies):

1035

note = b""

1038

note = b""

1036

if f in copy1 or f in copy2:

1039

if f in copy1 or f in copy2:

1037

note += b"*"

1040

note += b"*"

1038

if f in divergeset:

1041

if f in divergeset:

1039

note += b"!"

1042

note += b"!"

1040

if f in renamedeleteset:

1043

if f in renamedeleteset:

1041

note += b"%"

1044

note += b"%"

1042

repo.ui.debug(

1045

repo.ui.debug(

1043

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

1046

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

1044

)

1047

)

1045

del renamedeleteset

1048

del renamedeleteset

1046

del divergeset

1049

del divergeset

1047

1050

1048

repo.ui.debug(b" checking for directory renames\n")

1051

repo.ui.debug(b" checking for directory renames\n")

1049

1052

1050

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

1053

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

1051

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

1054

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

1052

1055

1053

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1056

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1054

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1057

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1055

1058

1056

return branch_copies1, branch_copies2, diverge

1059

return branch_copies1, branch_copies2, diverge

1057

1060

1058

1061

1059

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1062

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1060

"""Finds moved directories and files that should move with them.

1063

"""Finds moved directories and files that should move with them.

1061

1064

1062

ctx: the context for one of the sides

1065

ctx: the context for one of the sides

1063

copy: files copied on the same side (as ctx)

1066

copy: files copied on the same side (as ctx)

1064

fullcopy: files copied on the same side (as ctx), including those that

1067

fullcopy: files copied on the same side (as ctx), including those that

1065

merge.manifestmerge() won't care about

1068

merge.manifestmerge() won't care about

1066

addedfilesfn: function returning added files on the other side (compared to

1069

addedfilesfn: function returning added files on the other side (compared to

1067

ctx)

1070

ctx)

1068

"""

1071

"""

1069

# generate a directory move map

1072

# generate a directory move map

1070

invalid = set()

1073

invalid = set()

1071

dirmove = {}

1074

dirmove = {}

1072

1075

1073

# examine each file copy for a potential directory move, which is

1076

# examine each file copy for a potential directory move, which is

1074

# when all the files in a directory are moved to a new directory

1077

# when all the files in a directory are moved to a new directory

1075

for dst, src in pycompat.iteritems(fullcopy):

1078

for dst, src in pycompat.iteritems(fullcopy):

1076

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1079

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1077

if dsrc in invalid:

1080

if dsrc in invalid:

1078

# already seen to be uninteresting

1081

# already seen to be uninteresting

1079

continue

1082

continue

1080

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1083

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1081

# directory wasn't entirely moved locally

1084

# directory wasn't entirely moved locally

1082

invalid.add(dsrc)

1085

invalid.add(dsrc)

1083

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1086

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1084

# files from the same directory moved to two different places

1087

# files from the same directory moved to two different places

1085

invalid.add(dsrc)

1088

invalid.add(dsrc)

1086

else:

1089

else:

1087

# looks good so far

1090

# looks good so far

1088

dirmove[dsrc] = ddst

1091

dirmove[dsrc] = ddst

1089

1092

1090

for i in invalid:

1093

for i in invalid:

1091

if i in dirmove:

1094

if i in dirmove:

1092

del dirmove[i]

1095

del dirmove[i]

1093

del invalid

1096

del invalid

1094

1097

1095

if not dirmove:

1098

if not dirmove:

1096

return {}, {}

1099

return {}, {}

1097

1100

1098

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1101

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1099

1102

1100

for d in dirmove:

1103

for d in dirmove:

1101

repo.ui.debug(

1104

repo.ui.debug(

1102

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1105

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1103

)

1106

)

1104

1107

1105

# Sort the directories in reverse order, so we find children first

1108

# Sort the directories in reverse order, so we find children first

1106

# For example, if dir1/ was renamed to dir2/, and dir1/subdir1/

1109

# For example, if dir1/ was renamed to dir2/, and dir1/subdir1/

1107

# was renamed to dir2/subdir2/, we want to move dir1/subdir1/file

1110

# was renamed to dir2/subdir2/, we want to move dir1/subdir1/file

1108

# to dir2/subdir2/file (not dir2/subdir1/file)

1111

# to dir2/subdir2/file (not dir2/subdir1/file)

1109

dirmove_children_first = sorted(dirmove, reverse=True)

1112

dirmove_children_first = sorted(dirmove, reverse=True)

1110

1113

1111

movewithdir = {}

1114

movewithdir = {}

1112

# check unaccounted nonoverlapping files against directory moves

1115

# check unaccounted nonoverlapping files against directory moves

1113

for f in addedfilesfn():

1116

for f in addedfilesfn():

1114

if f not in fullcopy:

1117

if f not in fullcopy:

1115

for d in dirmove_children_first:

1118

for d in dirmove_children_first:

1116

if f.startswith(d):

1119

if f.startswith(d):

1117

# new file added in a directory that was moved, move it

1120

# new file added in a directory that was moved, move it

1118

df = dirmove[d] + f[len(d) :]

1121

df = dirmove[d] + f[len(d) :]

1119

if df not in copy:

1122

if df not in copy:

1120

movewithdir[f] = df

1123

movewithdir[f] = df

1121

repo.ui.debug(

1124

repo.ui.debug(

1122

b" pending file src: '%s' -> dst: '%s'\n"

1125

b" pending file src: '%s' -> dst: '%s'\n"

1123

% (f, df)

1126

% (f, df)

1124

)

1127

)

1125

break

1128

break

1126

1129

1127

return dirmove, movewithdir

1130

return dirmove, movewithdir

1128

1131

1129

1132

1130

def _heuristicscopytracing(repo, c1, c2, base):

1133

def _heuristicscopytracing(repo, c1, c2, base):

1131

"""Fast copytracing using filename heuristics

1134

"""Fast copytracing using filename heuristics

1132

1135

1133

Assumes that moves or renames are of following two types:

1136

Assumes that moves or renames are of following two types:

1134

1137

1135

1) Inside a directory only (same directory name but different filenames)

1138

1) Inside a directory only (same directory name but different filenames)

1136

2) Move from one directory to another

1139

2) Move from one directory to another

1137

(same filenames but different directory names)

1140

(same filenames but different directory names)

1138

1141

1139

Works only when there are no merge commits in the "source branch".

1142

Works only when there are no merge commits in the "source branch".

1140

Source branch is commits from base up to c2 not including base.

1143

Source branch is commits from base up to c2 not including base.

1141

1144

1142

If merge is involved it fallbacks to _fullcopytracing().

1145

If merge is involved it fallbacks to _fullcopytracing().

1143

1146

1144

Can be used by setting the following config:

1147

Can be used by setting the following config:

1145

1148

1146

[experimental]

1149

[experimental]

1147

copytrace = heuristics

1150

copytrace = heuristics

1148

1151

1149

In some cases the copy/move candidates found by heuristics can be very large

1152

In some cases the copy/move candidates found by heuristics can be very large

1150

in number and that will make the algorithm slow. The number of possible

1153

in number and that will make the algorithm slow. The number of possible

1151

candidates to check can be limited by using the config

1154

candidates to check can be limited by using the config

1152

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1155

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1153

"""

1156

"""

1154

1157

1155

if c1.rev() is None:

1158

if c1.rev() is None:

1156

c1 = c1.p1()

1159

c1 = c1.p1()

1157

if c2.rev() is None:

1160

if c2.rev() is None:

1158

c2 = c2.p1()

1161

c2 = c2.p1()

1159

1162

1160

changedfiles = set()

1163

changedfiles = set()

1161

m1 = c1.manifest()

1164

m1 = c1.manifest()

1162

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1165

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1163

# If base is not in c2 branch, we switch to fullcopytracing

1166

# If base is not in c2 branch, we switch to fullcopytracing

1164

repo.ui.debug(

1167

repo.ui.debug(

1165

b"switching to full copytracing as base is not "

1168

b"switching to full copytracing as base is not "

1166

b"an ancestor of c2\n"

1169

b"an ancestor of c2\n"

1167

)

1170

)

1168

return _fullcopytracing(repo, c1, c2, base)

1171

return _fullcopytracing(repo, c1, c2, base)

1169

1172

1170

ctx = c2

1173

ctx = c2

1171

while ctx != base:

1174

while ctx != base:

1172

if len(ctx.parents()) == 2:

1175

if len(ctx.parents()) == 2:

1173

# To keep things simple let's not handle merges

1176

# To keep things simple let's not handle merges

1174

repo.ui.debug(b"switching to full copytracing because of merges\n")

1177

repo.ui.debug(b"switching to full copytracing because of merges\n")

1175

return _fullcopytracing(repo, c1, c2, base)

1178

return _fullcopytracing(repo, c1, c2, base)

1176

changedfiles.update(ctx.files())

1179

changedfiles.update(ctx.files())

1177

ctx = ctx.p1()

1180

ctx = ctx.p1()

1178

1181

1179

copies2 = {}

1182

copies2 = {}

1180

cp = _forwardcopies(base, c2)

1183

cp = _forwardcopies(base, c2)

1181

for dst, src in pycompat.iteritems(cp):

1184

for dst, src in pycompat.iteritems(cp):

1182

if src in m1:

1185

if src in m1:

1183

copies2[dst] = src

1186

copies2[dst] = src

1184

1187

1185

# file is missing if it isn't present in the destination, but is present in

1188

# file is missing if it isn't present in the destination, but is present in

1186

# the base and present in the source.

1189

# the base and present in the source.

1187

# Presence in the base is important to exclude added files, presence in the

1190

# Presence in the base is important to exclude added files, presence in the

1188

# source is important to exclude removed files.

1191

# source is important to exclude removed files.

1189

filt = lambda f: f not in m1 and f in base and f in c2

1192

filt = lambda f: f not in m1 and f in base and f in c2

1190

missingfiles = [f for f in changedfiles if filt(f)]

1193

missingfiles = [f for f in changedfiles if filt(f)]

1191

1194

1192

copies1 = {}

1195

copies1 = {}

1193

if missingfiles:

1196

if missingfiles:

1194

basenametofilename = collections.defaultdict(list)

1197

basenametofilename = collections.defaultdict(list)

1195

dirnametofilename = collections.defaultdict(list)

1198

dirnametofilename = collections.defaultdict(list)

1196

1199

1197

for f in m1.filesnotin(base.manifest()):

1200

for f in m1.filesnotin(base.manifest()):

1198

basename = os.path.basename(f)

1201

basename = os.path.basename(f)

1199

dirname = os.path.dirname(f)

1202

dirname = os.path.dirname(f)

1200

basenametofilename[basename].append(f)

1203

basenametofilename[basename].append(f)

1201

dirnametofilename[dirname].append(f)

1204

dirnametofilename[dirname].append(f)

1202

1205

1203

for f in missingfiles:

1206

for f in missingfiles:

1204

basename = os.path.basename(f)

1207

basename = os.path.basename(f)

1205

dirname = os.path.dirname(f)

1208

dirname = os.path.dirname(f)

1206

samebasename = basenametofilename[basename]

1209

samebasename = basenametofilename[basename]

1207

samedirname = dirnametofilename[dirname]

1210

samedirname = dirnametofilename[dirname]

1208

movecandidates = samebasename + samedirname

1211

movecandidates = samebasename + samedirname

1209

# f is guaranteed to be present in c2, that's why

1212

# f is guaranteed to be present in c2, that's why

1210

# c2.filectx(f) won't fail

1213

# c2.filectx(f) won't fail

1211

f2 = c2.filectx(f)

1214

f2 = c2.filectx(f)

1212

# we can have a lot of candidates which can slow down the heuristics

1215

# we can have a lot of candidates which can slow down the heuristics

1213

# config value to limit the number of candidates moves to check

1216

# config value to limit the number of candidates moves to check

1214

maxcandidates = repo.ui.configint(

1217

maxcandidates = repo.ui.configint(

1215

b'experimental', b'copytrace.movecandidateslimit'

1218

b'experimental', b'copytrace.movecandidateslimit'

1216

)

1219

)

1217

1220

1218

if len(movecandidates) > maxcandidates:

1221

if len(movecandidates) > maxcandidates:

1219

repo.ui.status(

1222

repo.ui.status(

1220

_(

1223

_(

1221

b"skipping copytracing for '%s', more "

1224

b"skipping copytracing for '%s', more "

1222

b"candidates than the limit: %d\n"

1225

b"candidates than the limit: %d\n"

1223

)

1226

)

1224

% (f, len(movecandidates))

1227

% (f, len(movecandidates))

1225

)

1228

)

1226

continue

1229

continue

1227

1230

1228

for candidate in movecandidates:

1231

for candidate in movecandidates:

1229

f1 = c1.filectx(candidate)

1232

f1 = c1.filectx(candidate)

1230

if _related(f1, f2):

1233

if _related(f1, f2):

1231

# if there are a few related copies then we'll merge

1234

# if there are a few related copies then we'll merge

1232

# changes into all of them. This matches the behaviour

1235

# changes into all of them. This matches the behaviour

1233

# of upstream copytracing

1236

# of upstream copytracing

1234

copies1[candidate] = f

1237

copies1[candidate] = f

1235

1238

1236

return branch_copies(copies1), branch_copies(copies2), {}

1239

return branch_copies(copies1), branch_copies(copies2), {}

1237

1240

1238

1241

1239

def _related(f1, f2):

1242

def _related(f1, f2):

1240

"""return True if f1 and f2 filectx have a common ancestor

1243

"""return True if f1 and f2 filectx have a common ancestor

1241

1244

1242

Walk back to common ancestor to see if the two files originate

1245

Walk back to common ancestor to see if the two files originate

1243

from the same file. Since workingfilectx's rev() is None it messes

1246

from the same file. Since workingfilectx's rev() is None it messes

1244

up the integer comparison logic, hence the pre-step check for

1247

up the integer comparison logic, hence the pre-step check for

1245

None (f1 and f2 can only be workingfilectx's initially).

1248

None (f1 and f2 can only be workingfilectx's initially).

1246

"""

1249

"""

1247

1250

1248

if f1 == f2:

1251

if f1 == f2:

1249

return True # a match

1252

return True # a match

1250

1253

1251

g1, g2 = f1.ancestors(), f2.ancestors()

1254

g1, g2 = f1.ancestors(), f2.ancestors()

1252

try:

1255

try:

1253

f1r, f2r = f1.linkrev(), f2.linkrev()

1256

f1r, f2r = f1.linkrev(), f2.linkrev()

1254

1257

1255

if f1r is None:

1258

if f1r is None:

1256

f1 = next(g1)

1259

f1 = next(g1)

1257

if f2r is None:

1260

if f2r is None:

1258

f2 = next(g2)

1261

f2 = next(g2)

1259

1262

1260

while True:

1263

while True:

1261

f1r, f2r = f1.linkrev(), f2.linkrev()

1264

f1r, f2r = f1.linkrev(), f2.linkrev()

1262

if f1r > f2r:

1265

if f1r > f2r:

1263

f1 = next(g1)

1266

f1 = next(g1)

1264

elif f2r > f1r:

1267

elif f2r > f1r:

1265

f2 = next(g2)

1268

f2 = next(g2)

1266

else: # f1 and f2 point to files in the same linkrev

1269

else: # f1 and f2 point to files in the same linkrev

1267

return f1 == f2 # true if they point to the same file

1270

return f1 == f2 # true if they point to the same file

1268

except StopIteration:

1271

except StopIteration:

1269

return False

1272

return False

1270

1273

1271

1274

1272

def graftcopies(wctx, ctx, base):

1275

def graftcopies(wctx, ctx, base):

1273

"""reproduce copies between base and ctx in the wctx

1276

"""reproduce copies between base and ctx in the wctx

1274

1277

1275

Unlike mergecopies(), this function will only consider copies between base

1278

Unlike mergecopies(), this function will only consider copies between base

1276

and ctx; it will ignore copies between base and wctx. Also unlike

1279

and ctx; it will ignore copies between base and wctx. Also unlike

1277

mergecopies(), this function will apply copies to the working copy (instead

1280

mergecopies(), this function will apply copies to the working copy (instead

1278

of just returning information about the copies). That makes it cheaper

1281

of just returning information about the copies). That makes it cheaper

1279

(especially in the common case of base==ctx.p1()) and useful also when

1282

(especially in the common case of base==ctx.p1()) and useful also when

1280

experimental.copytrace=off.

1283

experimental.copytrace=off.

1281

1284

1282

merge.update() will have already marked most copies, but it will only

1285

merge.update() will have already marked most copies, but it will only

1283

mark copies if it thinks the source files are related (see

1286

mark copies if it thinks the source files are related (see

1284

merge._related()). It will also not mark copies if the file wasn't modified

1287

merge._related()). It will also not mark copies if the file wasn't modified

1285

on the local side. This function adds the copies that were "missed"

1288

on the local side. This function adds the copies that were "missed"

1286

by merge.update().

1289

by merge.update().

1287

"""

1290

"""

1288

new_copies = pathcopies(base, ctx)

1291

new_copies = pathcopies(base, ctx)

1289

parent = wctx.p1()

1292

parent = wctx.p1()

1290

_filter(parent, wctx, new_copies)

1293

_filter(parent, wctx, new_copies)

1291

# Extra filtering to drop copy information for files that existed before

1294

# Extra filtering to drop copy information for files that existed before

1292

# the graft. This is to handle the case of grafting a rename onto a commit

1295

# the graft. This is to handle the case of grafting a rename onto a commit

1293

# that already has the rename. Otherwise the presence of copy information

1296

# that already has the rename. Otherwise the presence of copy information

1294

# would result in the creation of an empty commit where we would prefer to

1297

# would result in the creation of an empty commit where we would prefer to

1295

# not create one.

1298

# not create one.

1296

for dest, __ in list(new_copies.items()):

1299

for dest, __ in list(new_copies.items()):

1297

if dest in parent:

1300

if dest in parent:

1298

del new_copies[dest]

1301

del new_copies[dest]

1299

for dst, src in pycompat.iteritems(new_copies):

1302

for dst, src in pycompat.iteritems(new_copies):

1300

wctx[dst].markcopied(src)

1303

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import (
                 nullid,
                 nullrev,
             )
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 6
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     if k == v:  # case 3
                         del t[k]
                     elif v not in src:  # case 5
                         # remove copies from files that didn't exist
                         del t[k]
                     elif k not in dst:  # case 1
                         # remove copies to files that were then removed
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
                     missing = list(b.walk(match))
                     # _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(b'debug.copies:      searching all files: %d\n' % len(missing))
                 else:
                     missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(
                             b'debug.copies:      missing files to search: %d\n'
                             % len(missing)
                         )
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 if rustmod is not None:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         return (p1, p2, raw)
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         else:
                             changes = None
                         return (p1, p2, changes)
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
                     # this deal with the special case mentionned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
                     # We iterate over the childre, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
                     # An interresting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     return _combine_changeset_copies(
                         revs,
                         children_count,
                         b.rev(),
                         revinfo,
                         match,
                         isancestor,
                         multi_thread,
                     )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None:
                     final_copies = rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, multi_thread
                     )
                 else:
                     isancestor = cached_is_ancestor(isancestor)
                     all_copies = {}
                     # iterate over all the "children" side of copy tracing "edge"
                     for current_rev in revs:
                         p1, p2, changes = revinfo(current_rev)
                         current_copies = None
                         # iterate over all parents to chain the existing data with the
                         # data from the parent → child edge.
                         for parent, parent_rev in ((1, p1), (2, p2)):
                             if parent_rev == nullrev:
                                 continue
                             remaining_children = children_count.get(parent_rev)
                             if remaining_children is None:
                                 continue
                             remaining_children -= 1
                             children_count[parent_rev] = remaining_children
                             if remaining_children:
                                 copies = all_copies.get(parent_rev, None)
                             else:
                                 copies = all_copies.pop(parent_rev, None)
                             if copies is None:
                                 # this is a root
                                 newcopies = copies = {}
                             elif remaining_children:
                                 newcopies = copies.copy()
                             else:
                                 newcopies = copies
                             # chain the data in the edge with the existing data
                             if changes is not None:
                                 childcopies = {}
                                 if parent == 1:
                                     childcopies = changes.copied_from_p1
                                 elif parent == 2:
                                     childcopies = changes.copied_from_p2
                                 if childcopies:
                                     newcopies = copies.copy()
                                     for dest, source in pycompat.iteritems(childcopies):
                                         prev = copies.get(source)
                                         if prev is not None and prev[1] is not None:
                                             source = prev[1]
                                         newcopies[dest] = (current_rev, source)
                                     assert newcopies is not copies
                                 if changes.removed:
                                     for f in changes.removed:
                                         if f in newcopies:
                                             if newcopies is copies:
                                                 # copy on write to avoid affecting potential other
                                                 # branches.  when there are no other branches, this
                                                 # could be avoided.
                                                 newcopies = copies.copy()
                                             newcopies[f] = (current_rev, None)
                             # check potential need to combine the data from another parent (for
                             # that child). See comment below for details.
                             if current_copies is None:
                                 current_copies = newcopies
                             else:
                                 # we are the second parent to work on c, we need to merge our
                                 # work with the other.
                                 #
                                 # In case of conflict, parent 1 take precedence over parent 2.
                                 # This is an arbitrary choice made anew when implementing
                                 # changeset based copies. It was made without regards with
                                 # potential filelog related behavior.
                                 assert parent == 2
                                 current_copies = _merge_copies_dict(
                                     newcopies,
                                     current_copies,
                                     isancestor,
                                     changes,
                                     current_rev,
                                 )
                         all_copies[current_rev] = current_copies
                     # filter out internal details and return a {dest: source mapping}
                     final_copies = {}
                     for dest, (tt, source) in all_copies[targetrev].items():
                         if source is not None:
                             final_copies[dest] = source
                 if not alwaysmatch:
                     for filename in list(final_copies.keys()):
                         if not match(filename):
                             del final_copies[filename]
                 return final_copies
             # constant to decide which side to pick with _merge_copies_dict
             PICK_MINOR = 0
             PICK_MAJOR = 1
             PICK_EITHER = 2
             def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         pick, overwrite = _compare_values(
                             changes, isancestor, dest, other, value
                         )
                         if overwrite:
                             if pick == PICK_MAJOR:
                                 minor[dest] = (current_merge, value[1])
                             else:
                                 minor[dest] = (current_merge, other[1])
                         elif pick == PICK_MAJOR:
                             minor[dest] = value
                 return minor
             def _compare_values(changes, isancestor, dest, minor, major):
                 """compare two value within a _merge_copies_dict loop iteration
                 return (pick, overwrite).
                 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
                 - overwrite is True if pick is a return of an ambiguity that needs resolution.
                 """
                 major_tt, major_value = major
                 minor_tt, minor_value = minor
                 if major_tt == minor_tt:
                     # if it comes from the same revision it must be the same value
                     assert major_value == minor_value
                     return PICK_EITHER, False
                 elif (
                     changes is not None
                     and minor_value is not None
                     and major_value is None
                     and dest in changes.salvaged
                 ):
                     # In this case, a deletion was reverted, the "alive" value overwrite
                     # the deleted one.
                     return PICK_MINOR, True
                 elif (
                     changes is not None
                     and major_value is not None
                     and minor_value is None
                     and dest in changes.salvaged
                 ):
                     # In this case, a deletion was reverted, the "alive" value overwrite
                     # the deleted one.
                     return PICK_MAJOR, True
                 elif isancestor(minor_tt, major_tt):
                     if changes is not None and dest in changes.merged:
                         # change to dest happened on the branch without copy-source change,
                         # so both source are valid and "major" wins.
                         return PICK_MAJOR, True
                     else:
                         return PICK_MAJOR, False
                 elif isancestor(major_tt, minor_tt):
                     if changes is not None and dest in changes.merged:
                         # change to dest happened on the branch without copy-source change,
                         # so both source are valid and "major" wins.
                         return PICK_MAJOR, True
                     else:
                         return PICK_MINOR, False
                 elif minor_value is None:
                     # in case of conflict, the "alive" side wins.
                     return PICK_MAJOR, True
                 elif major_value is None:
                     # in case of conflict, the "alive" side wins.
                     return PICK_MINOR, True
                 else:
                     # in case of conflict where both side are alive, major wins.
                     return PICK_MAJOR, True
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 """find renames from a to b"""
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 copies = _forwardcopies(b, a)
                 return _reverse_renames(copies, a, match)
             def _reverse_renames(copies, dst, match):
                 """given copies to context 'dst', finds renames from that context"""
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 r = {}
                 for k, v in sorted(pycompat.iteritems(copies)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in dst:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
+                    x_copies = _forwardcopies(a, x)
+                    y_copies = _forwardcopies(a, y, base, match=match)
+                    x_backward_renames = _reverse_renames(x_copies, x, match)
                     copies = _chain(
-                        _backwardrenames(x, a, match=match),
+                        x_backward_renames,
-                        _forwardcopies(a, y, base, match=match),
+                        y_copies,
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 # Sort the directories in reverse order, so we find children first
                 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
                 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
                 # to dir2/subdir2/file (not dir2/subdir1/file)
                 dirmove_children_first = sorted(dirmove, reverse=True)
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove_children_first:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 parent = wctx.p1()
                 _filter(parent, wctx, new_copies)
                 # Extra filtering to drop copy information for files that existed before
                 # the graft. This is to handle the case of grafting a rename onto a commit
                 # that already has the rename. Otherwise the presence of copy information
                 # would result in the creation of an empty commit where we would prefer to
                 # not create one.
                 for dest, __ in list(new_copies.items()):
                     if dest in parent:
                         del new_copies[dest]
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)