upstream/mercurial-mirror Commit - r48909:d86875b7

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import nullrev

15

from .node import nullrev

16

17

from . import (

17

from . import (

18

match as matchmod,

18

match as matchmod,

19

pathutil,

19

pathutil,

20

policy,

20

policy,

21

pycompat,

21

pycompat,

22

util,

22

util,

23

)

23

)

24

25

26

from .utils import stringutil

26

from .utils import stringutil

27

28

from .revlogutils import (

28

from .revlogutils import (

29

flagutil,

29

flagutil,

30

sidedata as sidedatamod,

30

sidedata as sidedatamod,

31

)

31

)

32

33

rustmod = policy.importrust("copy_tracing")

33

rustmod = policy.importrust("copy_tracing")

34

35

36

def _filter(src, dst, t):

36

def _filter(src, dst, t):

37

"""filters out invalid copies after chaining"""

37

"""filters out invalid copies after chaining"""

38

39

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

39

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

40

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

40

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

41

# in the following table (not including trivial cases). For example, case 6

41

# in the following table (not including trivial cases). For example, case 6

42

# is where a file existed in 'src' and remained under that name in 'mid' and

42

# is where a file existed in 'src' and remained under that name in 'mid' and

43

# then was renamed between 'mid' and 'dst'.

43

# then was renamed between 'mid' and 'dst'.

44

#

44

#

45

# case src mid dst result

45

# case src mid dst result

46

# 1 x y - -

46

# 1 x y - -

47

# 2 x y y x->y

47

# 2 x y y x->y

48

# 3 x y x -

48

# 3 x y x -

49

# 4 x y z x->z

49

# 4 x y z x->z

50

# 5 - x y -

50

# 5 - x y -

51

# 6 x x y x->y

51

# 6 x x y x->y

52

#

52

#

53

# _chain() takes care of chaining the copies in 'a' and 'b', but it

53

# _chain() takes care of chaining the copies in 'a' and 'b', but it

54

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

54

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

55

# between 5 and 6, so it includes all cases in its result.

55

# between 5 and 6, so it includes all cases in its result.

56

# Cases 1, 3, and 5 are then removed by _filter().

56

# Cases 1, 3, and 5 are then removed by _filter().

57

58

for k, v in list(t.items()):

58

for k, v in list(t.items()):

59

if k == v: # case 3

59

if k == v: # case 3

60

del t[k]

60

del t[k]

61

elif v not in src: # case 5

61

elif v not in src: # case 5

62

# remove copies from files that didn't exist

62

# remove copies from files that didn't exist

63

del t[k]

63

del t[k]

64

elif k not in dst: # case 1

64

elif k not in dst: # case 1

65

# remove copies to files that were then removed

65

# remove copies to files that were then removed

66

del t[k]

66

del t[k]

67

68

69

def _chain(prefix, suffix):

69

def _chain(prefix, suffix):

70

"""chain two sets of copies 'prefix' and 'suffix'"""

70

"""chain two sets of copies 'prefix' and 'suffix'"""

71

result = prefix.copy()

71

result = prefix.copy()

72

for key, value in pycompat.iteritems(suffix):

72

for key, value in pycompat.iteritems(suffix):

73

result[key] = prefix.get(value, value)

73

result[key] = prefix.get(value, value)

74

return result

74

return result

75

76

77

def _tracefile(fctx, am, basemf):

77

def _tracefile(fctx, am, basemf):

78

"""return file context that is the ancestor of fctx present in ancestor

78

"""return file context that is the ancestor of fctx present in ancestor

79

manifest am

79

manifest am

80

81

Note: we used to try and stop after a given limit, however checking if that

81

Note: we used to try and stop after a given limit, however checking if that

82

limit is reached turned out to be very expensive. we are better off

82

limit is reached turned out to be very expensive. we are better off

83

disabling that feature."""

83

disabling that feature."""

84

85

for f in fctx.ancestors():

85

for f in fctx.ancestors():

86

path = f.path()

86

path = f.path()

87

if am.get(path, None) == f.filenode():

87

if am.get(path, None) == f.filenode():

88

return path

88

return path

89

if basemf and basemf.get(path, None) == f.filenode():

89

if basemf and basemf.get(path, None) == f.filenode():

90

return path

90

return path

91

92

93

def _dirstatecopies(repo, match=None):

93

def _dirstatecopies(repo, match=None):

94

ds = repo.dirstate

94

ds = repo.dirstate

95

c = ds.copies().copy()

95

c = ds.copies().copy()

96

for k in list(c):

96

for k in list(c):

97

if ds[k] not in b~~'anm'~~ or (match and not match(k)):

97

if not ds.get_entry(k).tracked or (match and not match(k)):

98

del c[k]

98

del c[k]

99

return c

99

return c

100

101

102

def _computeforwardmissing(a, b, match=None):

102

def _computeforwardmissing(a, b, match=None):

103

"""Computes which files are in b but not a.

103

"""Computes which files are in b but not a.

104

This is its own function so extensions can easily wrap this call to see what

104

This is its own function so extensions can easily wrap this call to see what

105

files _forwardcopies is about to process.

105

files _forwardcopies is about to process.

106

"""

106

"""

107

ma = a.manifest()

107

ma = a.manifest()

108

mb = b.manifest()

108

mb = b.manifest()

109

return mb.filesnotin(ma, match=match)

109

return mb.filesnotin(ma, match=match)

110

111

112

def usechangesetcentricalgo(repo):

112

def usechangesetcentricalgo(repo):

113

"""Checks if we should use changeset-centric copy algorithms"""

113

"""Checks if we should use changeset-centric copy algorithms"""

114

if repo.filecopiesmode == b'changeset-sidedata':

114

if repo.filecopiesmode == b'changeset-sidedata':

115

return True

115

return True

116

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

116

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

changesetsource = (b'changeset-only', b'compatibility')

117

changesetsource = (b'changeset-only', b'compatibility')

118

return readfrom in changesetsource

118

return readfrom in changesetsource

119

120

121

def _committedforwardcopies(a, b, base, match):

121

def _committedforwardcopies(a, b, base, match):

122

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

122

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

# files might have to be traced back to the fctx parent of the last

123

# files might have to be traced back to the fctx parent of the last

124

# one-side-only changeset, but not further back than that

124

# one-side-only changeset, but not further back than that

125

repo = a._repo

125

repo = a._repo

126

127

if usechangesetcentricalgo(repo):

127

if usechangesetcentricalgo(repo):

128

return _changesetforwardcopies(a, b, match)

128

return _changesetforwardcopies(a, b, match)

129

130

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

130

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

dbg = repo.ui.debug

131

dbg = repo.ui.debug

132

if debug:

132

if debug:

133

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

133

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

am = a.manifest()

134

am = a.manifest()

135

basemf = None if base is None else base.manifest()

135

basemf = None if base is None else base.manifest()

136

137

# find where new files came from

137

# find where new files came from

138

# we currently don't try to find where old files went, too expensive

138

# we currently don't try to find where old files went, too expensive

139

# this means we can miss a case like 'hg rm b; hg cp a b'

139

# this means we can miss a case like 'hg rm b; hg cp a b'

140

cm = {}

140

cm = {}

141

142

# Computing the forward missing is quite expensive on large manifests, since

142

# Computing the forward missing is quite expensive on large manifests, since

143

# it compares the entire manifests. We can optimize it in the common use

143

# it compares the entire manifests. We can optimize it in the common use

144

# case of computing what copies are in a commit versus its parent (like

144

# case of computing what copies are in a commit versus its parent (like

145

# during a rebase or histedit). Note, we exclude merge commits from this

145

# during a rebase or histedit). Note, we exclude merge commits from this

146

# optimization, since the ctx.files() for a merge commit is not correct for

146

# optimization, since the ctx.files() for a merge commit is not correct for

147

# this comparison.

147

# this comparison.

148

forwardmissingmatch = match

148

forwardmissingmatch = match

149

if b.p1() == a and b.p2().rev() == nullrev:

149

if b.p1() == a and b.p2().rev() == nullrev:

150

filesmatcher = matchmod.exact(b.files())

150

filesmatcher = matchmod.exact(b.files())

151

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

151

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

152

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

153

missing = list(b.walk(match))

153

missing = list(b.walk(match))

154

# _computeforwardmissing(a, b, match=forwardmissingmatch)

154

# _computeforwardmissing(a, b, match=forwardmissingmatch)

155

if debug:

155

if debug:

156

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

156

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

157

else:

157

else:

158

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

158

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

159

if debug:

159

if debug:

160

dbg(

160

dbg(

161

b'debug.copies: missing files to search: %d\n'

161

b'debug.copies: missing files to search: %d\n'

162

% len(missing)

162

% len(missing)

163

)

163

)

164

165

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

165

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

166

167

for f in sorted(missing):

167

for f in sorted(missing):

168

if debug:

168

if debug:

169

dbg(b'debug.copies: tracing file: %s\n' % f)

169

dbg(b'debug.copies: tracing file: %s\n' % f)

170

fctx = b[f]

170

fctx = b[f]

171

fctx._ancestrycontext = ancestrycontext

171

fctx._ancestrycontext = ancestrycontext

172

173

if debug:

173

if debug:

174

start = util.timer()

174

start = util.timer()

175

opath = _tracefile(fctx, am, basemf)

175

opath = _tracefile(fctx, am, basemf)

176

if opath:

176

if opath:

177

if debug:

177

if debug:

178

dbg(b'debug.copies: rename of: %s\n' % opath)

178

dbg(b'debug.copies: rename of: %s\n' % opath)

179

cm[f] = opath

179

cm[f] = opath

180

if debug:

180

if debug:

181

dbg(

181

dbg(

182

b'debug.copies: time: %f seconds\n'

182

b'debug.copies: time: %f seconds\n'

183

% (util.timer() - start)

183

% (util.timer() - start)

184

)

184

)

185

return cm

185

return cm

186

187

188

def _revinfo_getter(repo, match):

188

def _revinfo_getter(repo, match):

189

"""returns a function that returns the following data given a <rev>"

189

"""returns a function that returns the following data given a <rev>"

190

191

* p1: revision number of first parent

191

* p1: revision number of first parent

192

* p2: revision number of first parent

192

* p2: revision number of first parent

193

* changes: a ChangingFiles object

193

* changes: a ChangingFiles object

194

"""

194

"""

195

cl = repo.changelog

195

cl = repo.changelog

196

parents = cl.parentrevs

196

parents = cl.parentrevs

197

flags = cl.flags

197

flags = cl.flags

198

199

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

199

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

200

201

changelogrevision = cl.changelogrevision

201

changelogrevision = cl.changelogrevision

202

203

if rustmod is not None:

203

if rustmod is not None:

204

205

def revinfo(rev):

205

def revinfo(rev):

206

p1, p2 = parents(rev)

206

p1, p2 = parents(rev)

207

if flags(rev) & HASCOPIESINFO:

207

if flags(rev) & HASCOPIESINFO:

208

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

208

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

209

else:

209

else:

210

raw = None

210

raw = None

211

return (p1, p2, raw)

211

return (p1, p2, raw)

212

213

else:

213

else:

214

215

def revinfo(rev):

215

def revinfo(rev):

216

p1, p2 = parents(rev)

216

p1, p2 = parents(rev)

217

if flags(rev) & HASCOPIESINFO:

217

if flags(rev) & HASCOPIESINFO:

218

changes = changelogrevision(rev).changes

218

changes = changelogrevision(rev).changes

219

else:

219

else:

220

changes = None

220

changes = None

221

return (p1, p2, changes)

221

return (p1, p2, changes)

222

223

return revinfo

223

return revinfo

224

225

226

def cached_is_ancestor(is_ancestor):

226

def cached_is_ancestor(is_ancestor):

227

"""return a cached version of is_ancestor"""

227

"""return a cached version of is_ancestor"""

228

cache = {}

228

cache = {}

229

230

def _is_ancestor(anc, desc):

230

def _is_ancestor(anc, desc):

231

if anc > desc:

231

if anc > desc:

232

return False

232

return False

233

elif anc == desc:

233

elif anc == desc:

234

return True

234

return True

235

key = (anc, desc)

235

key = (anc, desc)

236

ret = cache.get(key)

236

ret = cache.get(key)

237

if ret is None:

237

if ret is None:

238

ret = cache[key] = is_ancestor(anc, desc)

238

ret = cache[key] = is_ancestor(anc, desc)

239

return ret

239

return ret

240

241

return _is_ancestor

241

return _is_ancestor

242

243

244

def _changesetforwardcopies(a, b, match):

244

def _changesetforwardcopies(a, b, match):

245

if a.rev() in (nullrev, b.rev()):

245

if a.rev() in (nullrev, b.rev()):

246

return {}

246

return {}

247

248

repo = a.repo().unfiltered()

248

repo = a.repo().unfiltered()

249

children = {}

249

children = {}

250

251

cl = repo.changelog

251

cl = repo.changelog

252

isancestor = cl.isancestorrev

252

isancestor = cl.isancestorrev

253

254

# To track rename from "A" to B, we need to gather all parent → children

254

# To track rename from "A" to B, we need to gather all parent → children

255

# edges that are contains in `::B` but not in `::A`.

255

# edges that are contains in `::B` but not in `::A`.

256

#

256

#

257

#

257

#

258

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

258

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

259

# ::a`) and also all the "roots point", ie the parents of the exclusive set

259

# ::a`) and also all the "roots point", ie the parents of the exclusive set

260

# that belong to ::a. These are exactly all the revisions needed to express

260

# that belong to ::a. These are exactly all the revisions needed to express

261

# the parent → children we need to combine.

261

# the parent → children we need to combine.

262

#

262

#

263

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

263

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

264

# excluding paths that leads to roots that are not ancestors of `a`. We

264

# excluding paths that leads to roots that are not ancestors of `a`. We

265

# keep this out of the explanation because it is hard enough without this special case..

265

# keep this out of the explanation because it is hard enough without this special case..

266

267

parents = cl._uncheckedparentrevs

267

parents = cl._uncheckedparentrevs

268

graph_roots = (nullrev, nullrev)

268

graph_roots = (nullrev, nullrev)

269

270

ancestors = cl.ancestors([a.rev()], inclusive=True)

270

ancestors = cl.ancestors([a.rev()], inclusive=True)

271

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

271

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

272

roots = set()

272

roots = set()

273

has_graph_roots = False

273

has_graph_roots = False

274

multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')

274

multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')

275

276

# iterate over `only(B, A)`

276

# iterate over `only(B, A)`

277

for r in revs:

277

for r in revs:

278

ps = parents(r)

278

ps = parents(r)

279

if ps == graph_roots:

279

if ps == graph_roots:

280

has_graph_roots = True

280

has_graph_roots = True

281

else:

281

else:

282

p1, p2 = ps

282

p1, p2 = ps

283

284

# find all the "root points" (see larger comment above)

284

# find all the "root points" (see larger comment above)

285

if p1 != nullrev and p1 in ancestors:

285

if p1 != nullrev and p1 in ancestors:

286

roots.add(p1)

286

roots.add(p1)

287

if p2 != nullrev and p2 in ancestors:

287

if p2 != nullrev and p2 in ancestors:

288

roots.add(p2)

288

roots.add(p2)

289

if not roots:

289

if not roots:

290

# no common revision to track copies from

290

# no common revision to track copies from

291

return {}

291

return {}

292

if has_graph_roots:

292

if has_graph_roots:

293

# this deal with the special case mentionned in the [1] footnotes. We

293

# this deal with the special case mentionned in the [1] footnotes. We

294

# must filter out revisions that leads to non-common graphroots.

294

# must filter out revisions that leads to non-common graphroots.

295

roots = list(roots)

295

roots = list(roots)

296

m = min(roots)

296

m = min(roots)

297

h = [b.rev()]

297

h = [b.rev()]

298

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

298

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

299

roots_to_head = set(roots_to_head)

299

roots_to_head = set(roots_to_head)

300

revs = [r for r in revs if r in roots_to_head]

300

revs = [r for r in revs if r in roots_to_head]

301

302

if repo.filecopiesmode == b'changeset-sidedata':

302

if repo.filecopiesmode == b'changeset-sidedata':

303

# When using side-data, we will process the edges "from" the children.

303

# When using side-data, we will process the edges "from" the children.

304

# We iterate over the childre, gathering previous collected data for

304

# We iterate over the childre, gathering previous collected data for

305

# the parents. Do know when the parents data is no longer necessary, we

305

# the parents. Do know when the parents data is no longer necessary, we

306

# keep a counter of how many children each revision has.

306

# keep a counter of how many children each revision has.

307

#

307

#

308

# An interresting property of `children_count` is that it only contains

308

# An interresting property of `children_count` is that it only contains

309

# revision that will be relevant for a edge of the graph. So if a

309

# revision that will be relevant for a edge of the graph. So if a

310

# children has parent not in `children_count`, that edges should not be

310

# children has parent not in `children_count`, that edges should not be

311

# processed.

311

# processed.

312

children_count = dict((r, 0) for r in roots)

312

children_count = dict((r, 0) for r in roots)

313

for r in revs:

313

for r in revs:

314

for p in cl.parentrevs(r):

314

for p in cl.parentrevs(r):

315

if p == nullrev:

315

if p == nullrev:

316

continue

316

continue

317

children_count[r] = 0

317

children_count[r] = 0

318

if p in children_count:

318

if p in children_count:

319

children_count[p] += 1

319

children_count[p] += 1

320

revinfo = _revinfo_getter(repo, match)

320

revinfo = _revinfo_getter(repo, match)

321

with repo.changelog.reading():

321

with repo.changelog.reading():

322

return _combine_changeset_copies(

322

return _combine_changeset_copies(

323

revs,

323

revs,

324

children_count,

324

children_count,

325

b.rev(),

325

b.rev(),

326

revinfo,

326

revinfo,

327

match,

327

match,

328

isancestor,

328

isancestor,

329

multi_thread,

329

multi_thread,

330

)

330

)

331

else:

331

else:

332

# When not using side-data, we will process the edges "from" the parent.

332

# When not using side-data, we will process the edges "from" the parent.

333

# so we need a full mapping of the parent -> children relation.

333

# so we need a full mapping of the parent -> children relation.

334

children = dict((r, []) for r in roots)

334

children = dict((r, []) for r in roots)

335

for r in revs:

335

for r in revs:

336

for p in cl.parentrevs(r):

336

for p in cl.parentrevs(r):

337

if p == nullrev:

337

if p == nullrev:

338

continue

338

continue

339

children[r] = []

339

children[r] = []

340

if p in children:

340

if p in children:

341

children[p].append(r)

341

children[p].append(r)

342

x = revs.pop()

342

x = revs.pop()

343

assert x == b.rev()

343

assert x == b.rev()

344

revs.extend(roots)

344

revs.extend(roots)

345

revs.sort()

345

revs.sort()

346

347

revinfo = _revinfo_getter_extra(repo)

347

revinfo = _revinfo_getter_extra(repo)

348

return _combine_changeset_copies_extra(

348

return _combine_changeset_copies_extra(

349

revs, children, b.rev(), revinfo, match, isancestor

349

revs, children, b.rev(), revinfo, match, isancestor

350

)

350

)

351

352

353

def _combine_changeset_copies(

353

def _combine_changeset_copies(

354

revs, children_count, targetrev, revinfo, match, isancestor, multi_thread

354

revs, children_count, targetrev, revinfo, match, isancestor, multi_thread

355

):

355

):

356

"""combine the copies information for each item of iterrevs

356

"""combine the copies information for each item of iterrevs

357

358

revs: sorted iterable of revision to visit

358

revs: sorted iterable of revision to visit

359

children_count: a {parent: <number-of-relevant-children>} mapping.

359

children_count: a {parent: <number-of-relevant-children>} mapping.

360

targetrev: the final copies destination revision (not in iterrevs)

360

targetrev: the final copies destination revision (not in iterrevs)

361

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

361

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

362

match: a matcher

362

match: a matcher

363

364

It returns the aggregated copies information for `targetrev`.

364

It returns the aggregated copies information for `targetrev`.

365

"""

365

"""

366

367

alwaysmatch = match.always()

367

alwaysmatch = match.always()

368

369

if rustmod is not None:

369

if rustmod is not None:

370

final_copies = rustmod.combine_changeset_copies(

370

final_copies = rustmod.combine_changeset_copies(

371

list(revs), children_count, targetrev, revinfo, multi_thread

371

list(revs), children_count, targetrev, revinfo, multi_thread

372

)

372

)

373

else:

373

else:

374

isancestor = cached_is_ancestor(isancestor)

374

isancestor = cached_is_ancestor(isancestor)

375

376

all_copies = {}

376

all_copies = {}

377

# iterate over all the "children" side of copy tracing "edge"

377

# iterate over all the "children" side of copy tracing "edge"

378

for current_rev in revs:

378

for current_rev in revs:

379

p1, p2, changes = revinfo(current_rev)

379

p1, p2, changes = revinfo(current_rev)

380

current_copies = None

380

current_copies = None

381

# iterate over all parents to chain the existing data with the

381

# iterate over all parents to chain the existing data with the

382

# data from the parent → child edge.

382

# data from the parent → child edge.

383

for parent, parent_rev in ((1, p1), (2, p2)):

383

for parent, parent_rev in ((1, p1), (2, p2)):

384

if parent_rev == nullrev:

384

if parent_rev == nullrev:

385

continue

385

continue

386

remaining_children = children_count.get(parent_rev)

386

remaining_children = children_count.get(parent_rev)

387

if remaining_children is None:

387

if remaining_children is None:

388

continue

388

continue

389

remaining_children -= 1

389

remaining_children -= 1

390

children_count[parent_rev] = remaining_children

390

children_count[parent_rev] = remaining_children

391

if remaining_children:

391

if remaining_children:

392

copies = all_copies.get(parent_rev, None)

392

copies = all_copies.get(parent_rev, None)

393

else:

393

else:

394

copies = all_copies.pop(parent_rev, None)

394

copies = all_copies.pop(parent_rev, None)

395

396

if copies is None:

396

if copies is None:

397

# this is a root

397

# this is a root

398

newcopies = copies = {}

398

newcopies = copies = {}

399

elif remaining_children:

399

elif remaining_children:

400

newcopies = copies.copy()

400

newcopies = copies.copy()

401

else:

401

else:

402

newcopies = copies

402

newcopies = copies

403

# chain the data in the edge with the existing data

403

# chain the data in the edge with the existing data

404

if changes is not None:

404

if changes is not None:

405

childcopies = {}

405

childcopies = {}

406

if parent == 1:

406

if parent == 1:

407

childcopies = changes.copied_from_p1

407

childcopies = changes.copied_from_p1

408

elif parent == 2:

408

elif parent == 2:

409

childcopies = changes.copied_from_p2

409

childcopies = changes.copied_from_p2

410

411

if childcopies:

411

if childcopies:

412

newcopies = copies.copy()

412

newcopies = copies.copy()

413

for dest, source in pycompat.iteritems(childcopies):

413

for dest, source in pycompat.iteritems(childcopies):

414

prev = copies.get(source)

414

prev = copies.get(source)

415

if prev is not None and prev[1] is not None:

415

if prev is not None and prev[1] is not None:

416

source = prev[1]

416

source = prev[1]

417

newcopies[dest] = (current_rev, source)

417

newcopies[dest] = (current_rev, source)

418

assert newcopies is not copies

418

assert newcopies is not copies

419

if changes.removed:

419

if changes.removed:

420

for f in changes.removed:

420

for f in changes.removed:

421

if f in newcopies:

421

if f in newcopies:

422

if newcopies is copies:

422

if newcopies is copies:

423

# copy on write to avoid affecting potential other

423

# copy on write to avoid affecting potential other

424

# branches. when there are no other branches, this

424

# branches. when there are no other branches, this

425

# could be avoided.

425

# could be avoided.

426

newcopies = copies.copy()

426

newcopies = copies.copy()

427

newcopies[f] = (current_rev, None)

427

newcopies[f] = (current_rev, None)

428

# check potential need to combine the data from another parent (for

428

# check potential need to combine the data from another parent (for

429

# that child). See comment below for details.

429

# that child). See comment below for details.

430

if current_copies is None:

430

if current_copies is None:

431

current_copies = newcopies

431

current_copies = newcopies

432

else:

432

else:

433

# we are the second parent to work on c, we need to merge our

433

# we are the second parent to work on c, we need to merge our

434

# work with the other.

434

# work with the other.

435

#

435

#

436

# In case of conflict, parent 1 take precedence over parent 2.

436

# In case of conflict, parent 1 take precedence over parent 2.

437

# This is an arbitrary choice made anew when implementing

437

# This is an arbitrary choice made anew when implementing

438

# changeset based copies. It was made without regards with

438

# changeset based copies. It was made without regards with

439

# potential filelog related behavior.

439

# potential filelog related behavior.

440

assert parent == 2

440

assert parent == 2

441

current_copies = _merge_copies_dict(

441

current_copies = _merge_copies_dict(

442

newcopies,

442

newcopies,

443

current_copies,

443

current_copies,

444

isancestor,

444

isancestor,

445

changes,

445

changes,

446

current_rev,

446

current_rev,

447

)

447

)

448

all_copies[current_rev] = current_copies

448

all_copies[current_rev] = current_copies

449

450

# filter out internal details and return a {dest: source mapping}

450

# filter out internal details and return a {dest: source mapping}

451

final_copies = {}

451

final_copies = {}

452

for dest, (tt, source) in all_copies[targetrev].items():

452

for dest, (tt, source) in all_copies[targetrev].items():

453

if source is not None:

453

if source is not None:

454

final_copies[dest] = source

454

final_copies[dest] = source

455

if not alwaysmatch:

455

if not alwaysmatch:

456

for filename in list(final_copies.keys()):

456

for filename in list(final_copies.keys()):

457

if not match(filename):

457

if not match(filename):

458

del final_copies[filename]

458

del final_copies[filename]

459

return final_copies

459

return final_copies

460

461

462

# constant to decide which side to pick with _merge_copies_dict

462

# constant to decide which side to pick with _merge_copies_dict

463

PICK_MINOR = 0

463

PICK_MINOR = 0

464

PICK_MAJOR = 1

464

PICK_MAJOR = 1

465

PICK_EITHER = 2

465

PICK_EITHER = 2

466

467

468

def _merge_copies_dict(minor, major, isancestor, changes, current_merge):

468

def _merge_copies_dict(minor, major, isancestor, changes, current_merge):

469

"""merge two copies-mapping together, minor and major

469

"""merge two copies-mapping together, minor and major

470

471

In case of conflict, value from "major" will be picked.

471

In case of conflict, value from "major" will be picked.

472

473

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

473

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

474

ancestors of `high_rev`,

474

ancestors of `high_rev`,

475

476

- `ismerged(path)`: callable return True if `path` have been merged in the

476

- `ismerged(path)`: callable return True if `path` have been merged in the

477

current revision,

477

current revision,

478

479

return the resulting dict (in practice, the "minor" object, updated)

479

return the resulting dict (in practice, the "minor" object, updated)

480

"""

480

"""

481

for dest, value in major.items():

481

for dest, value in major.items():

482

other = minor.get(dest)

482

other = minor.get(dest)

483

if other is None:

483

if other is None:

484

minor[dest] = value

484

minor[dest] = value

485

else:

485

else:

486

pick, overwrite = _compare_values(

486

pick, overwrite = _compare_values(

487

changes, isancestor, dest, other, value

487

changes, isancestor, dest, other, value

488

)

488

)

489

if overwrite:

489

if overwrite:

490

if pick == PICK_MAJOR:

490

if pick == PICK_MAJOR:

491

minor[dest] = (current_merge, value[1])

491

minor[dest] = (current_merge, value[1])

492

else:

492

else:

493

minor[dest] = (current_merge, other[1])

493

minor[dest] = (current_merge, other[1])

494

elif pick == PICK_MAJOR:

494

elif pick == PICK_MAJOR:

495

minor[dest] = value

495

minor[dest] = value

496

return minor

496

return minor

497

498

499

def _compare_values(changes, isancestor, dest, minor, major):

499

def _compare_values(changes, isancestor, dest, minor, major):

500

"""compare two value within a _merge_copies_dict loop iteration

500

"""compare two value within a _merge_copies_dict loop iteration

501

502

return (pick, overwrite).

502

return (pick, overwrite).

503

504

- pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER

504

- pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER

505

- overwrite is True if pick is a return of an ambiguity that needs resolution.

505

- overwrite is True if pick is a return of an ambiguity that needs resolution.

506

"""

506

"""

507

major_tt, major_value = major

507

major_tt, major_value = major

508

minor_tt, minor_value = minor

508

minor_tt, minor_value = minor

509

510

if major_tt == minor_tt:

510

if major_tt == minor_tt:

511

# if it comes from the same revision it must be the same value

511

# if it comes from the same revision it must be the same value

512

assert major_value == minor_value

512

assert major_value == minor_value

513

return PICK_EITHER, False

513

return PICK_EITHER, False

514

elif (

514

elif (

515

changes is not None

515

changes is not None

516

and minor_value is not None

516

and minor_value is not None

517

and major_value is None

517

and major_value is None

518

and dest in changes.salvaged

518

and dest in changes.salvaged

519

):

519

):

520

# In this case, a deletion was reverted, the "alive" value overwrite

520

# In this case, a deletion was reverted, the "alive" value overwrite

521

# the deleted one.

521

# the deleted one.

522

return PICK_MINOR, True

522

return PICK_MINOR, True

523

elif (

523

elif (

524

changes is not None

524

changes is not None

525

and major_value is not None

525

and major_value is not None

526

and minor_value is None

526

and minor_value is None

527

and dest in changes.salvaged

527

and dest in changes.salvaged

528

):

528

):

529

# In this case, a deletion was reverted, the "alive" value overwrite

529

# In this case, a deletion was reverted, the "alive" value overwrite

530

# the deleted one.

530

# the deleted one.

531

return PICK_MAJOR, True

531

return PICK_MAJOR, True

532

elif isancestor(minor_tt, major_tt):

532

elif isancestor(minor_tt, major_tt):

533

if changes is not None and dest in changes.merged:

533

if changes is not None and dest in changes.merged:

534

# change to dest happened on the branch without copy-source change,

534

# change to dest happened on the branch without copy-source change,

535

# so both source are valid and "major" wins.

535

# so both source are valid and "major" wins.

536

return PICK_MAJOR, True

536

return PICK_MAJOR, True

537

else:

537

else:

538

return PICK_MAJOR, False

538

return PICK_MAJOR, False

539

elif isancestor(major_tt, minor_tt):

539

elif isancestor(major_tt, minor_tt):

540

if changes is not None and dest in changes.merged:

540

if changes is not None and dest in changes.merged:

541

# change to dest happened on the branch without copy-source change,

541

# change to dest happened on the branch without copy-source change,

542

# so both source are valid and "major" wins.

542

# so both source are valid and "major" wins.

543

return PICK_MAJOR, True

543

return PICK_MAJOR, True

544

else:

544

else:

545

return PICK_MINOR, False

545

return PICK_MINOR, False

546

elif minor_value is None:

546

elif minor_value is None:

547

# in case of conflict, the "alive" side wins.

547

# in case of conflict, the "alive" side wins.

548

return PICK_MAJOR, True

548

return PICK_MAJOR, True

549

elif major_value is None:

549

elif major_value is None:

550

# in case of conflict, the "alive" side wins.

550

# in case of conflict, the "alive" side wins.

551

return PICK_MINOR, True

551

return PICK_MINOR, True

552

else:

552

else:

553

# in case of conflict where both side are alive, major wins.

553

# in case of conflict where both side are alive, major wins.

554

return PICK_MAJOR, True

554

return PICK_MAJOR, True

555

556

557

def _revinfo_getter_extra(repo):

557

def _revinfo_getter_extra(repo):

558

"""return a function that return multiple data given a <rev>"i

558

"""return a function that return multiple data given a <rev>"i

559

560

* p1: revision number of first parent

560

* p1: revision number of first parent

561

* p2: revision number of first parent

561

* p2: revision number of first parent

562

* p1copies: mapping of copies from p1

562

* p1copies: mapping of copies from p1

563

* p2copies: mapping of copies from p2

563

* p2copies: mapping of copies from p2

564

* removed: a list of removed files

564

* removed: a list of removed files

565

* ismerged: a callback to know if file was merged in that revision

565

* ismerged: a callback to know if file was merged in that revision

566

"""

566

"""

567

cl = repo.changelog

567

cl = repo.changelog

568

parents = cl.parentrevs

568

parents = cl.parentrevs

569

570

def get_ismerged(rev):

570

def get_ismerged(rev):

571

ctx = repo[rev]

571

ctx = repo[rev]

572

573

def ismerged(path):

573

def ismerged(path):

574

if path not in ctx.files():

574

if path not in ctx.files():

575

return False

575

return False

576

fctx = ctx[path]

576

fctx = ctx[path]

577

parents = fctx._filelog.parents(fctx._filenode)

577

parents = fctx._filelog.parents(fctx._filenode)

578

nb_parents = 0

578

nb_parents = 0

579

for n in parents:

579

for n in parents:

580

if n != repo.nullid:

580

if n != repo.nullid:

581

nb_parents += 1

581

nb_parents += 1

582

return nb_parents >= 2

582

return nb_parents >= 2

583

584

return ismerged

584

return ismerged

585

586

def revinfo(rev):

586

def revinfo(rev):

587

p1, p2 = parents(rev)

587

p1, p2 = parents(rev)

588

ctx = repo[rev]

588

ctx = repo[rev]

589

p1copies, p2copies = ctx._copies

589

p1copies, p2copies = ctx._copies

590

removed = ctx.filesremoved()

590

removed = ctx.filesremoved()

591

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

591

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

592

593

return revinfo

593

return revinfo

594

595

596

def _combine_changeset_copies_extra(

596

def _combine_changeset_copies_extra(

597

revs, children, targetrev, revinfo, match, isancestor

597

revs, children, targetrev, revinfo, match, isancestor

598

):

598

):

599

"""version of `_combine_changeset_copies` that works with the Google

599

"""version of `_combine_changeset_copies` that works with the Google

600

specific "extra" based storage for copy information"""

600

specific "extra" based storage for copy information"""

601

all_copies = {}

601

all_copies = {}

602

alwaysmatch = match.always()

602

alwaysmatch = match.always()

603

for r in revs:

603

for r in revs:

604

copies = all_copies.pop(r, None)

604

copies = all_copies.pop(r, None)

605

if copies is None:

605

if copies is None:

606

# this is a root

606

# this is a root

607

copies = {}

607

copies = {}

608

for i, c in enumerate(children[r]):

608

for i, c in enumerate(children[r]):

609

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

609

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

610

if r == p1:

610

if r == p1:

611

parent = 1

611

parent = 1

612

childcopies = p1copies

612

childcopies = p1copies

613

else:

613

else:

614

assert r == p2

614

assert r == p2

615

parent = 2

615

parent = 2

616

childcopies = p2copies

616

childcopies = p2copies

617

if not alwaysmatch:

617

if not alwaysmatch:

618

childcopies = {

618

childcopies = {

619

dst: src for dst, src in childcopies.items() if match(dst)

619

dst: src for dst, src in childcopies.items() if match(dst)

620

}

620

}

621

newcopies = copies

621

newcopies = copies

622

if childcopies:

622

if childcopies:

623

newcopies = copies.copy()

623

newcopies = copies.copy()

624

for dest, source in pycompat.iteritems(childcopies):

624

for dest, source in pycompat.iteritems(childcopies):

625

prev = copies.get(source)

625

prev = copies.get(source)

626

if prev is not None and prev[1] is not None:

626

if prev is not None and prev[1] is not None:

627

source = prev[1]

627

source = prev[1]

628

newcopies[dest] = (c, source)

628

newcopies[dest] = (c, source)

629

assert newcopies is not copies

629

assert newcopies is not copies

630

for f in removed:

630

for f in removed:

631

if f in newcopies:

631

if f in newcopies:

632

if newcopies is copies:

632

if newcopies is copies:

633

# copy on write to avoid affecting potential other

633

# copy on write to avoid affecting potential other

634

# branches. when there are no other branches, this

634

# branches. when there are no other branches, this

635

# could be avoided.

635

# could be avoided.

636

newcopies = copies.copy()

636

newcopies = copies.copy()

637

newcopies[f] = (c, None)

637

newcopies[f] = (c, None)

638

othercopies = all_copies.get(c)

638

othercopies = all_copies.get(c)

639

if othercopies is None:

639

if othercopies is None:

640

all_copies[c] = newcopies

640

all_copies[c] = newcopies

641

else:

641

else:

642

# we are the second parent to work on c, we need to merge our

642

# we are the second parent to work on c, we need to merge our

643

# work with the other.

643

# work with the other.

644

#

644

#

645

# In case of conflict, parent 1 take precedence over parent 2.

645

# In case of conflict, parent 1 take precedence over parent 2.

646

# This is an arbitrary choice made anew when implementing

646

# This is an arbitrary choice made anew when implementing

647

# changeset based copies. It was made without regards with

647

# changeset based copies. It was made without regards with

648

# potential filelog related behavior.

648

# potential filelog related behavior.

649

if parent == 1:

649

if parent == 1:

650

_merge_copies_dict_extra(

650

_merge_copies_dict_extra(

651

othercopies, newcopies, isancestor, ismerged

651

othercopies, newcopies, isancestor, ismerged

652

)

652

)

653

else:

653

else:

654

_merge_copies_dict_extra(

654

_merge_copies_dict_extra(

655

newcopies, othercopies, isancestor, ismerged

655

newcopies, othercopies, isancestor, ismerged

656

)

656

)

657

all_copies[c] = newcopies

657

all_copies[c] = newcopies

658

659

final_copies = {}

659

final_copies = {}

660

for dest, (tt, source) in all_copies[targetrev].items():

660

for dest, (tt, source) in all_copies[targetrev].items():

661

if source is not None:

661

if source is not None:

662

final_copies[dest] = source

662

final_copies[dest] = source

663

return final_copies

663

return final_copies

664

665

666

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

666

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

667

"""version of `_merge_copies_dict` that works with the Google

667

"""version of `_merge_copies_dict` that works with the Google

668

specific "extra" based storage for copy information"""

668

specific "extra" based storage for copy information"""

669

for dest, value in major.items():

669

for dest, value in major.items():

670

other = minor.get(dest)

670

other = minor.get(dest)

671

if other is None:

671

if other is None:

672

minor[dest] = value

672

minor[dest] = value

673

else:

673

else:

674

new_tt = value[0]

674

new_tt = value[0]

675

other_tt = other[0]

675

other_tt = other[0]

676

if value[1] == other[1]:

676

if value[1] == other[1]:

677

continue

677

continue

678

# content from "major" wins, unless it is older

678

# content from "major" wins, unless it is older

679

# than the branch point or there is a merge

679

# than the branch point or there is a merge

680

if (

680

if (

681

new_tt == other_tt

681

new_tt == other_tt

682

or not isancestor(new_tt, other_tt)

682

or not isancestor(new_tt, other_tt)

683

or ismerged(dest)

683

or ismerged(dest)

684

):

684

):

685

minor[dest] = value

685

minor[dest] = value

686

687

688

def _forwardcopies(a, b, base=None, match=None):

688

def _forwardcopies(a, b, base=None, match=None):

689

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

689

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

690

691

if base is None:

691

if base is None:

692

base = a

692

base = a

693

match = a.repo().narrowmatch(match)

693

match = a.repo().narrowmatch(match)

694

# check for working copy

694

# check for working copy

695

if b.rev() is None:

695

if b.rev() is None:

696

cm = _committedforwardcopies(a, b.p1(), base, match)

696

cm = _committedforwardcopies(a, b.p1(), base, match)

697

# combine copies from dirstate if necessary

697

# combine copies from dirstate if necessary

698

copies = _chain(cm, _dirstatecopies(b._repo, match))

698

copies = _chain(cm, _dirstatecopies(b._repo, match))

699

else:

699

else:

700

copies = _committedforwardcopies(a, b, base, match)

700

copies = _committedforwardcopies(a, b, base, match)

701

return copies

701

return copies

702

703

704

def _backwardrenames(a, b, match):

704

def _backwardrenames(a, b, match):

705

"""find renames from a to b"""

705

"""find renames from a to b"""

706

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

706

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

707

return {}

707

return {}

708

709

# We don't want to pass in "match" here, since that would filter

709

# We don't want to pass in "match" here, since that would filter

710

# the destination by it. Since we're reversing the copies, we want

710

# the destination by it. Since we're reversing the copies, we want

711

# to filter the source instead.

711

# to filter the source instead.

712

copies = _forwardcopies(b, a)

712

copies = _forwardcopies(b, a)

713

return _reverse_renames(copies, a, match)

713

return _reverse_renames(copies, a, match)

714

715

716

def _reverse_renames(copies, dst, match):

716

def _reverse_renames(copies, dst, match):

717

"""given copies to context 'dst', finds renames from that context"""

717

"""given copies to context 'dst', finds renames from that context"""

718

# Even though we're not taking copies into account, 1:n rename situations

718

# Even though we're not taking copies into account, 1:n rename situations

719

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

719

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

720

# arbitrarily pick one of the renames.

720

# arbitrarily pick one of the renames.

721

r = {}

721

r = {}

722

for k, v in sorted(pycompat.iteritems(copies)):

722

for k, v in sorted(pycompat.iteritems(copies)):

723

if match and not match(v):

723

if match and not match(v):

724

continue

724

continue

725

# remove copies

725

# remove copies

726

if v in dst:

726

if v in dst:

727

continue

727

continue

728

r[v] = k

728

r[v] = k

729

return r

729

return r

730

731

732

def pathcopies(x, y, match=None):

732

def pathcopies(x, y, match=None):

733

"""find {dst@y: src@x} copy mapping for directed compare"""

733

"""find {dst@y: src@x} copy mapping for directed compare"""

734

repo = x._repo

734

repo = x._repo

735

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

735

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

736

if debug:

736

if debug:

737

repo.ui.debug(

737

repo.ui.debug(

738

b'debug.copies: searching copies from %s to %s\n' % (x, y)

738

b'debug.copies: searching copies from %s to %s\n' % (x, y)

739

)

739

)

740

if x == y or not x or not y:

740

if x == y or not x or not y:

741

return {}

741

return {}

742

if y.rev() is None and x == y.p1():

742

if y.rev() is None and x == y.p1():

743

if debug:

743

if debug:

744

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

744

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

745

# short-circuit to avoid issues with merge states

745

# short-circuit to avoid issues with merge states

746

return _dirstatecopies(repo, match)

746

return _dirstatecopies(repo, match)

747

a = y.ancestor(x)

747

a = y.ancestor(x)

748

if a == x:

748

if a == x:

749

if debug:

749

if debug:

750

repo.ui.debug(b'debug.copies: search mode: forward\n')

750

repo.ui.debug(b'debug.copies: search mode: forward\n')

751

copies = _forwardcopies(x, y, match=match)

751

copies = _forwardcopies(x, y, match=match)

752

elif a == y:

752

elif a == y:

753

if debug:

753

if debug:

754

repo.ui.debug(b'debug.copies: search mode: backward\n')

754

repo.ui.debug(b'debug.copies: search mode: backward\n')

755

copies = _backwardrenames(x, y, match=match)

755

copies = _backwardrenames(x, y, match=match)

756

else:

756

else:

757

if debug:

757

if debug:

758

repo.ui.debug(b'debug.copies: search mode: combined\n')

758

repo.ui.debug(b'debug.copies: search mode: combined\n')

759

base = None

759

base = None

760

if a.rev() != nullrev:

760

if a.rev() != nullrev:

761

base = x

761

base = x

762

x_copies = _forwardcopies(a, x)

762

x_copies = _forwardcopies(a, x)

763

y_copies = _forwardcopies(a, y, base, match=match)

763

y_copies = _forwardcopies(a, y, base, match=match)

764

same_keys = set(x_copies) & set(y_copies)

764

same_keys = set(x_copies) & set(y_copies)

765

for k in same_keys:

765

for k in same_keys:

766

if x_copies.get(k) == y_copies.get(k):

766

if x_copies.get(k) == y_copies.get(k):

767

del x_copies[k]

767

del x_copies[k]

768

del y_copies[k]

768

del y_copies[k]

769

x_backward_renames = _reverse_renames(x_copies, x, match)

769

x_backward_renames = _reverse_renames(x_copies, x, match)

770

copies = _chain(

770

copies = _chain(

771

x_backward_renames,

771

x_backward_renames,

772

y_copies,

772

y_copies,

773

)

773

)

774

_filter(x, y, copies)

774

_filter(x, y, copies)

775

return copies

775

return copies

776

777

778

def mergecopies(repo, c1, c2, base):

778

def mergecopies(repo, c1, c2, base):

779

"""

779

"""

780

Finds moves and copies between context c1 and c2 that are relevant for

780

Finds moves and copies between context c1 and c2 that are relevant for

781

merging. 'base' will be used as the merge base.

781

merging. 'base' will be used as the merge base.

782

783

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

783

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

784

files that were moved/ copied in one merge parent and modified in another.

784

files that were moved/ copied in one merge parent and modified in another.

785

For example:

785

For example:

786

787

o ---> 4 another commit

787

o ---> 4 another commit

788

|

788

|

789

| o ---> 3 commit that modifies a.txt

789

| o ---> 3 commit that modifies a.txt

790

| /

790

| /

791

o / ---> 2 commit that moves a.txt to b.txt

791

o / ---> 2 commit that moves a.txt to b.txt

792

|/

792

|/

793

o ---> 1 merge base

793

o ---> 1 merge base

794

795

If we try to rebase revision 3 on revision 4, since there is no a.txt in

795

If we try to rebase revision 3 on revision 4, since there is no a.txt in

796

revision 4, and if user have copytrace disabled, we prints the following

796

revision 4, and if user have copytrace disabled, we prints the following

797

message:

797

message:

798

799

```other changed <file> which local deleted```

799

```other changed <file> which local deleted```

800

801

Returns a tuple where:

801

Returns a tuple where:

802

803

"branch_copies" an instance of branch_copies.

803

"branch_copies" an instance of branch_copies.

804

805

"diverge" is a mapping of source name -> list of destination names

805

"diverge" is a mapping of source name -> list of destination names

806

for divergent renames.

806

for divergent renames.

807

808

This function calls different copytracing algorithms based on config.

808

This function calls different copytracing algorithms based on config.

809

"""

809

"""

810

# avoid silly behavior for update from empty dir

810

# avoid silly behavior for update from empty dir

811

if not c1 or not c2 or c1 == c2:

811

if not c1 or not c2 or c1 == c2:

812

return branch_copies(), branch_copies(), {}

812

return branch_copies(), branch_copies(), {}

813

814

narrowmatch = c1.repo().narrowmatch()

814

narrowmatch = c1.repo().narrowmatch()

815

816

# avoid silly behavior for parent -> working dir

816

# avoid silly behavior for parent -> working dir

817

if c2.node() is None and c1.node() == repo.dirstate.p1():

817

if c2.node() is None and c1.node() == repo.dirstate.p1():

818

return (

818

return (

819

branch_copies(_dirstatecopies(repo, narrowmatch)),

819

branch_copies(_dirstatecopies(repo, narrowmatch)),

820

branch_copies(),

820

branch_copies(),

821

{},

821

{},

822

)

822

)

823

824

copytracing = repo.ui.config(b'experimental', b'copytrace')

824

copytracing = repo.ui.config(b'experimental', b'copytrace')

825

if stringutil.parsebool(copytracing) is False:

825

if stringutil.parsebool(copytracing) is False:

826

# stringutil.parsebool() returns None when it is unable to parse the

826

# stringutil.parsebool() returns None when it is unable to parse the

827

# value, so we should rely on making sure copytracing is on such cases

827

# value, so we should rely on making sure copytracing is on such cases

828

return branch_copies(), branch_copies(), {}

828

return branch_copies(), branch_copies(), {}

829

830

if usechangesetcentricalgo(repo):

830

if usechangesetcentricalgo(repo):

831

# The heuristics don't make sense when we need changeset-centric algos

831

# The heuristics don't make sense when we need changeset-centric algos

832

return _fullcopytracing(repo, c1, c2, base)

832

return _fullcopytracing(repo, c1, c2, base)

833

834

# Copy trace disabling is explicitly below the node == p1 logic above

834

# Copy trace disabling is explicitly below the node == p1 logic above

835

# because the logic above is required for a simple copy to be kept across a

835

# because the logic above is required for a simple copy to be kept across a

836

# rebase.

836

# rebase.

837

if copytracing == b'heuristics':

837

if copytracing == b'heuristics':

838

# Do full copytracing if only non-public revisions are involved as

838

# Do full copytracing if only non-public revisions are involved as

839

# that will be fast enough and will also cover the copies which could

839

# that will be fast enough and will also cover the copies which could

840

# be missed by heuristics

840

# be missed by heuristics

841

if _isfullcopytraceable(repo, c1, base):

841

if _isfullcopytraceable(repo, c1, base):

842

return _fullcopytracing(repo, c1, c2, base)

842

return _fullcopytracing(repo, c1, c2, base)

843

return _heuristicscopytracing(repo, c1, c2, base)

843

return _heuristicscopytracing(repo, c1, c2, base)

844

else:

844

else:

845

return _fullcopytracing(repo, c1, c2, base)

845

return _fullcopytracing(repo, c1, c2, base)

846

847

848

def _isfullcopytraceable(repo, c1, base):

848

def _isfullcopytraceable(repo, c1, base):

849

"""Checks that if base, source and destination are all no-public branches,

849

"""Checks that if base, source and destination are all no-public branches,

850

if yes let's use the full copytrace algorithm for increased capabilities

850

if yes let's use the full copytrace algorithm for increased capabilities

851

since it will be fast enough.

851

since it will be fast enough.

852

853

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

853

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

854

number of changesets from c1 to base such that if number of changesets are

854

number of changesets from c1 to base such that if number of changesets are

855

more than the limit, full copytracing algorithm won't be used.

855

more than the limit, full copytracing algorithm won't be used.

856

"""

856

"""

857

if c1.rev() is None:

857

if c1.rev() is None:

858

c1 = c1.p1()

858

c1 = c1.p1()

859

if c1.mutable() and base.mutable():

859

if c1.mutable() and base.mutable():

860

sourcecommitlimit = repo.ui.configint(

860

sourcecommitlimit = repo.ui.configint(

861

b'experimental', b'copytrace.sourcecommitlimit'

861

b'experimental', b'copytrace.sourcecommitlimit'

862

)

862

)

863

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

863

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

864

return commits < sourcecommitlimit

864

return commits < sourcecommitlimit

865

return False

865

return False

866

867

868

def _checksinglesidecopies(

868

def _checksinglesidecopies(

869

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

869

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

870

):

870

):

871

if src not in m2:

871

if src not in m2:

872

# deleted on side 2

872

# deleted on side 2

873

if src not in m1:

873

if src not in m1:

874

# renamed on side 1, deleted on side 2

874

# renamed on side 1, deleted on side 2

875

renamedelete[src] = dsts1

875

renamedelete[src] = dsts1

876

elif src not in mb:

876

elif src not in mb:

877

# Work around the "short-circuit to avoid issues with merge states"

877

# Work around the "short-circuit to avoid issues with merge states"

878

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

878

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

879

# destination doesn't exist in y.

879

# destination doesn't exist in y.

880

pass

880

pass

881

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

881

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

882

return

882

return

883

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

883

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

884

# modified on side 2

884

# modified on side 2

885

for dst in dsts1:

885

for dst in dsts1:

886

copy[dst] = src

886

copy[dst] = src

887

888

889

class branch_copies(object):

889

class branch_copies(object):

890

"""Information about copies made on one side of a merge/graft.

890

"""Information about copies made on one side of a merge/graft.

891

892

"copy" is a mapping from destination name -> source name,

892

"copy" is a mapping from destination name -> source name,

893

where source is in c1 and destination is in c2 or vice-versa.

893

where source is in c1 and destination is in c2 or vice-versa.

894

895

"movewithdir" is a mapping from source name -> destination name,

895

"movewithdir" is a mapping from source name -> destination name,

896

where the file at source present in one context but not the other

896

where the file at source present in one context but not the other

897

needs to be moved to destination by the merge process, because the

897

needs to be moved to destination by the merge process, because the

898

other context moved the directory it is in.

898

other context moved the directory it is in.

899

900

"renamedelete" is a mapping of source name -> list of destination

900

"renamedelete" is a mapping of source name -> list of destination

901

names for files deleted in c1 that were renamed in c2 or vice-versa.

901

names for files deleted in c1 that were renamed in c2 or vice-versa.

902

903

"dirmove" is a mapping of detected source dir -> destination dir renames.

903

"dirmove" is a mapping of detected source dir -> destination dir renames.

904

This is needed for handling changes to new files previously grafted into

904

This is needed for handling changes to new files previously grafted into

905

renamed directories.

905

renamed directories.

906

"""

906

"""

907

908

def __init__(

908

def __init__(

909

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

909

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

910

):

910

):

911

self.copy = {} if copy is None else copy

911

self.copy = {} if copy is None else copy

912

self.renamedelete = {} if renamedelete is None else renamedelete

912

self.renamedelete = {} if renamedelete is None else renamedelete

913

self.dirmove = {} if dirmove is None else dirmove

913

self.dirmove = {} if dirmove is None else dirmove

914

self.movewithdir = {} if movewithdir is None else movewithdir

914

self.movewithdir = {} if movewithdir is None else movewithdir

915

916

def __repr__(self):

916

def __repr__(self):

917

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

917

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

918

self.copy,

918

self.copy,

919

self.renamedelete,

919

self.renamedelete,

920

self.dirmove,

920

self.dirmove,

921

self.movewithdir,

921

self.movewithdir,

922

)

922

)

923

924

925

def _fullcopytracing(repo, c1, c2, base):

925

def _fullcopytracing(repo, c1, c2, base):

926

"""The full copytracing algorithm which finds all the new files that were

926

"""The full copytracing algorithm which finds all the new files that were

927

added from merge base up to the top commit and for each file it checks if

927

added from merge base up to the top commit and for each file it checks if

928

this file was copied from another file.

928

this file was copied from another file.

929

930

This is pretty slow when a lot of changesets are involved but will track all

930

This is pretty slow when a lot of changesets are involved but will track all

931

the copies.

931

the copies.

932

"""

932

"""

933

m1 = c1.manifest()

933

m1 = c1.manifest()

934

m2 = c2.manifest()

934

m2 = c2.manifest()

935

mb = base.manifest()

935

mb = base.manifest()

936

937

copies1 = pathcopies(base, c1)

937

copies1 = pathcopies(base, c1)

938

copies2 = pathcopies(base, c2)

938

copies2 = pathcopies(base, c2)

939

940

if not (copies1 or copies2):

940

if not (copies1 or copies2):

941

return branch_copies(), branch_copies(), {}

941

return branch_copies(), branch_copies(), {}

942

943

inversecopies1 = {}

943

inversecopies1 = {}

944

inversecopies2 = {}

944

inversecopies2 = {}

945

for dst, src in copies1.items():

945

for dst, src in copies1.items():

946

inversecopies1.setdefault(src, []).append(dst)

946

inversecopies1.setdefault(src, []).append(dst)

947

for dst, src in copies2.items():

947

for dst, src in copies2.items():

948

inversecopies2.setdefault(src, []).append(dst)

948

inversecopies2.setdefault(src, []).append(dst)

949

950

copy1 = {}

950

copy1 = {}

951

copy2 = {}

951

copy2 = {}

952

diverge = {}

952

diverge = {}

953

renamedelete1 = {}

953

renamedelete1 = {}

954

renamedelete2 = {}

954

renamedelete2 = {}

955

allsources = set(inversecopies1) | set(inversecopies2)

955

allsources = set(inversecopies1) | set(inversecopies2)

956

for src in allsources:

956

for src in allsources:

957

dsts1 = inversecopies1.get(src)

957

dsts1 = inversecopies1.get(src)

958

dsts2 = inversecopies2.get(src)

958

dsts2 = inversecopies2.get(src)

959

if dsts1 and dsts2:

959

if dsts1 and dsts2:

960

# copied/renamed on both sides

960

# copied/renamed on both sides

961

if src not in m1 and src not in m2:

961

if src not in m1 and src not in m2:

962

# renamed on both sides

962

# renamed on both sides

963

dsts1 = set(dsts1)

963

dsts1 = set(dsts1)

964

dsts2 = set(dsts2)

964

dsts2 = set(dsts2)

965

# If there's some overlap in the rename destinations, we

965

# If there's some overlap in the rename destinations, we

966

# consider it not divergent. For example, if side 1 copies 'a'

966

# consider it not divergent. For example, if side 1 copies 'a'

967

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

967

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

968

# and 'd' and deletes 'a'.

968

# and 'd' and deletes 'a'.

969

if dsts1 & dsts2:

969

if dsts1 & dsts2:

970

for dst in dsts1 & dsts2:

970

for dst in dsts1 & dsts2:

971

copy1[dst] = src

971

copy1[dst] = src

972

copy2[dst] = src

972

copy2[dst] = src

973

else:

973

else:

974

diverge[src] = sorted(dsts1 | dsts2)

974

diverge[src] = sorted(dsts1 | dsts2)

975

elif src in m1 and src in m2:

975

elif src in m1 and src in m2:

976

# copied on both sides

976

# copied on both sides

977

dsts1 = set(dsts1)

977

dsts1 = set(dsts1)

978

dsts2 = set(dsts2)

978

dsts2 = set(dsts2)

979

for dst in dsts1 & dsts2:

979

for dst in dsts1 & dsts2:

980

copy1[dst] = src

980

copy1[dst] = src

981

copy2[dst] = src

981

copy2[dst] = src

982

# TODO: Handle cases where it was renamed on one side and copied

982

# TODO: Handle cases where it was renamed on one side and copied

983

# on the other side

983

# on the other side

984

elif dsts1:

984

elif dsts1:

985

# copied/renamed only on side 1

985

# copied/renamed only on side 1

986

_checksinglesidecopies(

986

_checksinglesidecopies(

987

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

987

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

988

)

988

)

989

elif dsts2:

989

elif dsts2:

990

# copied/renamed only on side 2

990

# copied/renamed only on side 2

991

_checksinglesidecopies(

991

_checksinglesidecopies(

992

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

992

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

993

)

993

)

994

995

# find interesting file sets from manifests

995

# find interesting file sets from manifests

996

cache = []

996

cache = []

997

998

def _get_addedfiles(idx):

998

def _get_addedfiles(idx):

999

if not cache:

999

if not cache:

1000

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

1000

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

1001

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

1001

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

1002

u1 = sorted(addedinm1 - addedinm2)

1002

u1 = sorted(addedinm1 - addedinm2)

1003

u2 = sorted(addedinm2 - addedinm1)

1003

u2 = sorted(addedinm2 - addedinm1)

1004

cache.extend((u1, u2))

1004

cache.extend((u1, u2))

1005

return cache[idx]

1005

return cache[idx]

1006

1007

u1fn = lambda: _get_addedfiles(0)

1007

u1fn = lambda: _get_addedfiles(0)

1008

u2fn = lambda: _get_addedfiles(1)

1008

u2fn = lambda: _get_addedfiles(1)

1009

if repo.ui.debugflag:

1009

if repo.ui.debugflag:

1010

u1 = u1fn()

1010

u1 = u1fn()

1011

u2 = u2fn()

1011

u2 = u2fn()

1012

1013

header = b" unmatched files in %s"

1013

header = b" unmatched files in %s"

1014

if u1:

1014

if u1:

1015

repo.ui.debug(

1015

repo.ui.debug(

1016

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

1016

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

1017

)

1017

)

1018

if u2:

1018

if u2:

1019

repo.ui.debug(

1019

repo.ui.debug(

1020

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

1020

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

1021

)

1021

)

1022

1023

renamedeleteset = set()

1023

renamedeleteset = set()

1024

divergeset = set()

1024

divergeset = set()

1025

for dsts in diverge.values():

1025

for dsts in diverge.values():

1026

divergeset.update(dsts)

1026

divergeset.update(dsts)

1027

for dsts in renamedelete1.values():

1027

for dsts in renamedelete1.values():

1028

renamedeleteset.update(dsts)

1028

renamedeleteset.update(dsts)

1029

for dsts in renamedelete2.values():

1029

for dsts in renamedelete2.values():

1030

renamedeleteset.update(dsts)

1030

renamedeleteset.update(dsts)

1031

1032

repo.ui.debug(

1032

repo.ui.debug(

1033

b" all copies found (* = to merge, ! = divergent, "

1033

b" all copies found (* = to merge, ! = divergent, "

1034

b"% = renamed and deleted):\n"

1034

b"% = renamed and deleted):\n"

1035

)

1035

)

1036

for side, copies in ((b"local", copies1), (b"remote", copies2)):

1036

for side, copies in ((b"local", copies1), (b"remote", copies2)):

1037

if not copies:

1037

if not copies:

1038

continue

1038

continue

1039

repo.ui.debug(b" on %s side:\n" % side)

1039

repo.ui.debug(b" on %s side:\n" % side)

1040

for f in sorted(copies):

1040

for f in sorted(copies):

1041

note = b""

1041

note = b""

1042

if f in copy1 or f in copy2:

1042

if f in copy1 or f in copy2:

1043

note += b"*"

1043

note += b"*"

1044

if f in divergeset:

1044

if f in divergeset:

1045

note += b"!"

1045

note += b"!"

1046

if f in renamedeleteset:

1046

if f in renamedeleteset:

1047

note += b"%"

1047

note += b"%"

1048

repo.ui.debug(

1048

repo.ui.debug(

1049

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

1049

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

1050

)

1050

)

1051

del renamedeleteset

1051

del renamedeleteset

1052

del divergeset

1052

del divergeset

1053

1054

repo.ui.debug(b" checking for directory renames\n")

1054

repo.ui.debug(b" checking for directory renames\n")

1055

1056

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

1056

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

1057

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

1057

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

1058

1059

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1059

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1060

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1060

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1061

1062

return branch_copies1, branch_copies2, diverge

1062

return branch_copies1, branch_copies2, diverge

1063

1064

1065

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1065

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1066

"""Finds moved directories and files that should move with them.

1066

"""Finds moved directories and files that should move with them.

1067

1068

ctx: the context for one of the sides

1068

ctx: the context for one of the sides

1069

copy: files copied on the same side (as ctx)

1069

copy: files copied on the same side (as ctx)

1070

fullcopy: files copied on the same side (as ctx), including those that

1070

fullcopy: files copied on the same side (as ctx), including those that

1071

merge.manifestmerge() won't care about

1071

merge.manifestmerge() won't care about

1072

addedfilesfn: function returning added files on the other side (compared to

1072

addedfilesfn: function returning added files on the other side (compared to

1073

ctx)

1073

ctx)

1074

"""

1074

"""

1075

# generate a directory move map

1075

# generate a directory move map

1076

invalid = set()

1076

invalid = set()

1077

dirmove = {}

1077

dirmove = {}

1078

1079

# examine each file copy for a potential directory move, which is

1079

# examine each file copy for a potential directory move, which is

1080

# when all the files in a directory are moved to a new directory

1080

# when all the files in a directory are moved to a new directory

1081

for dst, src in pycompat.iteritems(fullcopy):

1081

for dst, src in pycompat.iteritems(fullcopy):

1082

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1082

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1083

if dsrc in invalid:

1083

if dsrc in invalid:

1084

# already seen to be uninteresting

1084

# already seen to be uninteresting

1085

continue

1085

continue

1086

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1086

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1087

# directory wasn't entirely moved locally

1087

# directory wasn't entirely moved locally

1088

invalid.add(dsrc)

1088

invalid.add(dsrc)

1089

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1089

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1090

# files from the same directory moved to two different places

1090

# files from the same directory moved to two different places

1091

invalid.add(dsrc)

1091

invalid.add(dsrc)

1092

else:

1092

else:

1093

# looks good so far

1093

# looks good so far

1094

dirmove[dsrc] = ddst

1094

dirmove[dsrc] = ddst

1095

1096

for i in invalid:

1096

for i in invalid:

1097

if i in dirmove:

1097

if i in dirmove:

1098

del dirmove[i]

1098

del dirmove[i]

1099

del invalid

1099

del invalid

1100

1101

if not dirmove:

1101

if not dirmove:

1102

return {}, {}

1102

return {}, {}

1103

1104

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1104

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1105

1106

for d in dirmove:

1106

for d in dirmove:

1107

repo.ui.debug(

1107

repo.ui.debug(

1108

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1108

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1109

)

1109

)

1110

1111

# Sort the directories in reverse order, so we find children first

1111

# Sort the directories in reverse order, so we find children first

1112

# For example, if dir1/ was renamed to dir2/, and dir1/subdir1/

1112

# For example, if dir1/ was renamed to dir2/, and dir1/subdir1/

1113

# was renamed to dir2/subdir2/, we want to move dir1/subdir1/file

1113

# was renamed to dir2/subdir2/, we want to move dir1/subdir1/file

1114

# to dir2/subdir2/file (not dir2/subdir1/file)

1114

# to dir2/subdir2/file (not dir2/subdir1/file)

1115

dirmove_children_first = sorted(dirmove, reverse=True)

1115

dirmove_children_first = sorted(dirmove, reverse=True)

1116

1117

movewithdir = {}

1117

movewithdir = {}

1118

# check unaccounted nonoverlapping files against directory moves

1118

# check unaccounted nonoverlapping files against directory moves

1119

for f in addedfilesfn():

1119

for f in addedfilesfn():

1120

if f not in fullcopy:

1120

if f not in fullcopy:

1121

for d in dirmove_children_first:

1121

for d in dirmove_children_first:

1122

if f.startswith(d):

1122

if f.startswith(d):

1123

# new file added in a directory that was moved, move it

1123

# new file added in a directory that was moved, move it

1124

df = dirmove[d] + f[len(d) :]

1124

df = dirmove[d] + f[len(d) :]

1125

if df not in copy:

1125

if df not in copy:

1126

movewithdir[f] = df

1126

movewithdir[f] = df

1127

repo.ui.debug(

1127

repo.ui.debug(

1128

b" pending file src: '%s' -> dst: '%s'\n"

1128

b" pending file src: '%s' -> dst: '%s'\n"

1129

% (f, df)

1129

% (f, df)

1130

)

1130

)

1131

break

1131

break

1132

1133

return dirmove, movewithdir

1133

return dirmove, movewithdir

1134

1135

1136

def _heuristicscopytracing(repo, c1, c2, base):

1136

def _heuristicscopytracing(repo, c1, c2, base):

1137

"""Fast copytracing using filename heuristics

1137

"""Fast copytracing using filename heuristics

1138

1139

Assumes that moves or renames are of following two types:

1139

Assumes that moves or renames are of following two types:

1140

1141

1) Inside a directory only (same directory name but different filenames)

1141

1) Inside a directory only (same directory name but different filenames)

1142

2) Move from one directory to another

1142

2) Move from one directory to another

1143

(same filenames but different directory names)

1143

(same filenames but different directory names)

1144

1145

Works only when there are no merge commits in the "source branch".

1145

Works only when there are no merge commits in the "source branch".

1146

Source branch is commits from base up to c2 not including base.

1146

Source branch is commits from base up to c2 not including base.

1147

1148

If merge is involved it fallbacks to _fullcopytracing().

1148

If merge is involved it fallbacks to _fullcopytracing().

1149

1150

Can be used by setting the following config:

1150

Can be used by setting the following config:

1151

1152

[experimental]

1152

[experimental]

1153

copytrace = heuristics

1153

copytrace = heuristics

1154

1155

In some cases the copy/move candidates found by heuristics can be very large

1155

In some cases the copy/move candidates found by heuristics can be very large

1156

in number and that will make the algorithm slow. The number of possible

1156

in number and that will make the algorithm slow. The number of possible

1157

candidates to check can be limited by using the config

1157

candidates to check can be limited by using the config

1158

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1158

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1159

"""

1159

"""

1160

1161

if c1.rev() is None:

1161

if c1.rev() is None:

1162

c1 = c1.p1()

1162

c1 = c1.p1()

1163

if c2.rev() is None:

1163

if c2.rev() is None:

1164

c2 = c2.p1()

1164

c2 = c2.p1()

1165

1166

changedfiles = set()

1166

changedfiles = set()

1167

m1 = c1.manifest()

1167

m1 = c1.manifest()

1168

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1168

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1169

# If base is not in c2 branch, we switch to fullcopytracing

1169

# If base is not in c2 branch, we switch to fullcopytracing

1170

repo.ui.debug(

1170

repo.ui.debug(

1171

b"switching to full copytracing as base is not "

1171

b"switching to full copytracing as base is not "

1172

b"an ancestor of c2\n"

1172

b"an ancestor of c2\n"

1173

)

1173

)

1174

return _fullcopytracing(repo, c1, c2, base)

1174

return _fullcopytracing(repo, c1, c2, base)

1175

1176

ctx = c2

1176

ctx = c2

1177

while ctx != base:

1177

while ctx != base:

1178

if len(ctx.parents()) == 2:

1178

if len(ctx.parents()) == 2:

1179

# To keep things simple let's not handle merges

1179

# To keep things simple let's not handle merges

1180

repo.ui.debug(b"switching to full copytracing because of merges\n")

1180

repo.ui.debug(b"switching to full copytracing because of merges\n")

1181

return _fullcopytracing(repo, c1, c2, base)

1181

return _fullcopytracing(repo, c1, c2, base)

1182

changedfiles.update(ctx.files())

1182

changedfiles.update(ctx.files())

1183

ctx = ctx.p1()

1183

ctx = ctx.p1()

1184

1185

copies2 = {}

1185

copies2 = {}

1186

cp = _forwardcopies(base, c2)

1186

cp = _forwardcopies(base, c2)

1187

for dst, src in pycompat.iteritems(cp):

1187

for dst, src in pycompat.iteritems(cp):

1188

if src in m1:

1188

if src in m1:

1189

copies2[dst] = src

1189

copies2[dst] = src

1190

1191

# file is missing if it isn't present in the destination, but is present in

1191

# file is missing if it isn't present in the destination, but is present in

1192

# the base and present in the source.

1192

# the base and present in the source.

1193

# Presence in the base is important to exclude added files, presence in the

1193

# Presence in the base is important to exclude added files, presence in the

1194

# source is important to exclude removed files.

1194

# source is important to exclude removed files.

1195

filt = lambda f: f not in m1 and f in base and f in c2

1195

filt = lambda f: f not in m1 and f in base and f in c2

1196

missingfiles = [f for f in changedfiles if filt(f)]

1196

missingfiles = [f for f in changedfiles if filt(f)]

1197

1198

copies1 = {}

1198

copies1 = {}

1199

if missingfiles:

1199

if missingfiles:

1200

basenametofilename = collections.defaultdict(list)

1200

basenametofilename = collections.defaultdict(list)

1201

dirnametofilename = collections.defaultdict(list)

1201

dirnametofilename = collections.defaultdict(list)

1202

1203

for f in m1.filesnotin(base.manifest()):

1203

for f in m1.filesnotin(base.manifest()):

1204

basename = os.path.basename(f)

1204

basename = os.path.basename(f)

1205

dirname = os.path.dirname(f)

1205

dirname = os.path.dirname(f)

1206

basenametofilename[basename].append(f)

1206

basenametofilename[basename].append(f)

1207

dirnametofilename[dirname].append(f)

1207

dirnametofilename[dirname].append(f)

1208

1209

for f in missingfiles:

1209

for f in missingfiles:

1210

basename = os.path.basename(f)

1210

basename = os.path.basename(f)

1211

dirname = os.path.dirname(f)

1211

dirname = os.path.dirname(f)

1212

samebasename = basenametofilename[basename]

1212

samebasename = basenametofilename[basename]

1213

samedirname = dirnametofilename[dirname]

1213

samedirname = dirnametofilename[dirname]

1214

movecandidates = samebasename + samedirname

1214

movecandidates = samebasename + samedirname

1215

# f is guaranteed to be present in c2, that's why

1215

# f is guaranteed to be present in c2, that's why

1216

# c2.filectx(f) won't fail

1216

# c2.filectx(f) won't fail

1217

f2 = c2.filectx(f)

1217

f2 = c2.filectx(f)

1218

# we can have a lot of candidates which can slow down the heuristics

1218

# we can have a lot of candidates which can slow down the heuristics

1219

# config value to limit the number of candidates moves to check

1219

# config value to limit the number of candidates moves to check

1220

maxcandidates = repo.ui.configint(

1220

maxcandidates = repo.ui.configint(

1221

b'experimental', b'copytrace.movecandidateslimit'

1221

b'experimental', b'copytrace.movecandidateslimit'

1222

)

1222

)

1223

1224

if len(movecandidates) > maxcandidates:

1224

if len(movecandidates) > maxcandidates:

1225

repo.ui.status(

1225

repo.ui.status(

1226

_(

1226

_(

1227

b"skipping copytracing for '%s', more "

1227

b"skipping copytracing for '%s', more "

1228

b"candidates than the limit: %d\n"

1228

b"candidates than the limit: %d\n"

1229

)

1229

)

1230

% (f, len(movecandidates))

1230

% (f, len(movecandidates))

1231

)

1231

)

1232

continue

1232

continue

1233

1234

for candidate in movecandidates:

1234

for candidate in movecandidates:

1235

f1 = c1.filectx(candidate)

1235

f1 = c1.filectx(candidate)

1236

if _related(f1, f2):

1236

if _related(f1, f2):

1237

# if there are a few related copies then we'll merge

1237

# if there are a few related copies then we'll merge

1238

# changes into all of them. This matches the behaviour

1238

# changes into all of them. This matches the behaviour

1239

# of upstream copytracing

1239

# of upstream copytracing

1240

copies1[candidate] = f

1240

copies1[candidate] = f

1241

1242

return branch_copies(copies1), branch_copies(copies2), {}

1242

return branch_copies(copies1), branch_copies(copies2), {}

1243

1244

1245

def _related(f1, f2):

1245

def _related(f1, f2):

1246

"""return True if f1 and f2 filectx have a common ancestor

1246

"""return True if f1 and f2 filectx have a common ancestor

1247

1248

Walk back to common ancestor to see if the two files originate

1248

Walk back to common ancestor to see if the two files originate

1249

from the same file. Since workingfilectx's rev() is None it messes

1249

from the same file. Since workingfilectx's rev() is None it messes

1250

up the integer comparison logic, hence the pre-step check for

1250

up the integer comparison logic, hence the pre-step check for

1251

None (f1 and f2 can only be workingfilectx's initially).

1251

None (f1 and f2 can only be workingfilectx's initially).

1252

"""

1252

"""

1253

1254

if f1 == f2:

1254

if f1 == f2:

1255

return True # a match

1255

return True # a match

1256

1257

g1, g2 = f1.ancestors(), f2.ancestors()

1257

g1, g2 = f1.ancestors(), f2.ancestors()

1258

try:

1258

try:

1259

f1r, f2r = f1.linkrev(), f2.linkrev()

1259

f1r, f2r = f1.linkrev(), f2.linkrev()

1260

1261

if f1r is None:

1261

if f1r is None:

1262

f1 = next(g1)

1262

f1 = next(g1)

1263

if f2r is None:

1263

if f2r is None:

1264

f2 = next(g2)

1264

f2 = next(g2)

1265

1266

while True:

1266

while True:

1267

f1r, f2r = f1.linkrev(), f2.linkrev()

1267

f1r, f2r = f1.linkrev(), f2.linkrev()

1268

if f1r > f2r:

1268

if f1r > f2r:

1269

f1 = next(g1)

1269

f1 = next(g1)

1270

elif f2r > f1r:

1270

elif f2r > f1r:

1271

f2 = next(g2)

1271

f2 = next(g2)

1272

else: # f1 and f2 point to files in the same linkrev

1272

else: # f1 and f2 point to files in the same linkrev

1273

return f1 == f2 # true if they point to the same file

1273

return f1 == f2 # true if they point to the same file

1274

except StopIteration:

1274

except StopIteration:

1275

return False

1275

return False

1276

1277

1278

def graftcopies(wctx, ctx, base):

1278

def graftcopies(wctx, ctx, base):

1279

"""reproduce copies between base and ctx in the wctx

1279

"""reproduce copies between base and ctx in the wctx

1280

1281

Unlike mergecopies(), this function will only consider copies between base

1281

Unlike mergecopies(), this function will only consider copies between base

1282

and ctx; it will ignore copies between base and wctx. Also unlike

1282

and ctx; it will ignore copies between base and wctx. Also unlike

1283

mergecopies(), this function will apply copies to the working copy (instead

1283

mergecopies(), this function will apply copies to the working copy (instead

1284

of just returning information about the copies). That makes it cheaper

1284

of just returning information about the copies). That makes it cheaper

1285

(especially in the common case of base==ctx.p1()) and useful also when

1285

(especially in the common case of base==ctx.p1()) and useful also when

1286

experimental.copytrace=off.

1286

experimental.copytrace=off.

1287

1288

merge.update() will have already marked most copies, but it will only

1288

merge.update() will have already marked most copies, but it will only

1289

mark copies if it thinks the source files are related (see

1289

mark copies if it thinks the source files are related (see

1290

merge._related()). It will also not mark copies if the file wasn't modified

1290

merge._related()). It will also not mark copies if the file wasn't modified

1291

on the local side. This function adds the copies that were "missed"

1291

on the local side. This function adds the copies that were "missed"

1292

by merge.update().

1292

by merge.update().

1293

"""

1293

"""

1294

new_copies = pathcopies(base, ctx)

1294

new_copies = pathcopies(base, ctx)

1295

parent = wctx.p1()

1295

parent = wctx.p1()

1296

_filter(parent, wctx, new_copies)

1296

_filter(parent, wctx, new_copies)

1297

# Extra filtering to drop copy information for files that existed before

1297

# Extra filtering to drop copy information for files that existed before

1298

# the graft. This is to handle the case of grafting a rename onto a commit

1298

# the graft. This is to handle the case of grafting a rename onto a commit

1299

# that already has the rename. Otherwise the presence of copy information

1299

# that already has the rename. Otherwise the presence of copy information

1300

# would result in the creation of an empty commit where we would prefer to

1300

# would result in the creation of an empty commit where we would prefer to

1301

# not create one.

1301

# not create one.

1302

for dest, __ in list(new_copies.items()):

1302

for dest, __ in list(new_copies.items()):

1303

if dest in parent:

1303

if dest in parent:

1304

del new_copies[dest]

1304

del new_copies[dest]

1305

for dst, src in pycompat.iteritems(new_copies):

1305

for dst, src in pycompat.iteritems(new_copies):

1306

wctx[dst].markcopied(src)

1306

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import nullrev
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 6
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     if k == v:  # case 3
                         del t[k]
                     elif v not in src:  # case 5
                         # remove copies from files that didn't exist
                         del t[k]
                     elif k not in dst:  # case 1
                         # remove copies to files that were then removed
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
-                    if ds[k] not in b'anm' or (match and not match(k)):
+                    if not ds.get_entry(k).tracked or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().rev() == nullrev:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
                     missing = list(b.walk(match))
                     # _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(b'debug.copies:      searching all files: %d\n' % len(missing))
                 else:
                     missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(
                             b'debug.copies:      missing files to search: %d\n'
                             % len(missing)
                         )
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 if rustmod is not None:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         return (p1, p2, raw)
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         else:
                             changes = None
                         return (p1, p2, changes)
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
                     # this deal with the special case mentionned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
                     # We iterate over the childre, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
                     # An interresting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     with repo.changelog.reading():
                         return _combine_changeset_copies(
                             revs,
                             children_count,
                             b.rev(),
                             revinfo,
                             match,
                             isancestor,
                             multi_thread,
                         )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None:
                     final_copies = rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, multi_thread
                     )
                 else:
                     isancestor = cached_is_ancestor(isancestor)
                     all_copies = {}
                     # iterate over all the "children" side of copy tracing "edge"
                     for current_rev in revs:
                         p1, p2, changes = revinfo(current_rev)
                         current_copies = None
                         # iterate over all parents to chain the existing data with the
                         # data from the parent → child edge.
                         for parent, parent_rev in ((1, p1), (2, p2)):
                             if parent_rev == nullrev:
                                 continue
                             remaining_children = children_count.get(parent_rev)
                             if remaining_children is None:
                                 continue
                             remaining_children -= 1
                             children_count[parent_rev] = remaining_children
                             if remaining_children:
                                 copies = all_copies.get(parent_rev, None)
                             else:
                                 copies = all_copies.pop(parent_rev, None)
                             if copies is None:
                                 # this is a root
                                 newcopies = copies = {}
                             elif remaining_children:
                                 newcopies = copies.copy()
                             else:
                                 newcopies = copies
                             # chain the data in the edge with the existing data
                             if changes is not None:
                                 childcopies = {}
                                 if parent == 1:
                                     childcopies = changes.copied_from_p1
                                 elif parent == 2:
                                     childcopies = changes.copied_from_p2
                                 if childcopies:
                                     newcopies = copies.copy()
                                     for dest, source in pycompat.iteritems(childcopies):
                                         prev = copies.get(source)
                                         if prev is not None and prev[1] is not None:
                                             source = prev[1]
                                         newcopies[dest] = (current_rev, source)
                                     assert newcopies is not copies
                                 if changes.removed:
                                     for f in changes.removed:
                                         if f in newcopies:
                                             if newcopies is copies:
                                                 # copy on write to avoid affecting potential other
                                                 # branches.  when there are no other branches, this
                                                 # could be avoided.
                                                 newcopies = copies.copy()
                                             newcopies[f] = (current_rev, None)
                             # check potential need to combine the data from another parent (for
                             # that child). See comment below for details.
                             if current_copies is None:
                                 current_copies = newcopies
                             else:
                                 # we are the second parent to work on c, we need to merge our
                                 # work with the other.
                                 #
                                 # In case of conflict, parent 1 take precedence over parent 2.
                                 # This is an arbitrary choice made anew when implementing
                                 # changeset based copies. It was made without regards with
                                 # potential filelog related behavior.
                                 assert parent == 2
                                 current_copies = _merge_copies_dict(
                                     newcopies,
                                     current_copies,
                                     isancestor,
                                     changes,
                                     current_rev,
                                 )
                         all_copies[current_rev] = current_copies
                     # filter out internal details and return a {dest: source mapping}
                     final_copies = {}
                     for dest, (tt, source) in all_copies[targetrev].items():
                         if source is not None:
                             final_copies[dest] = source
                 if not alwaysmatch:
                     for filename in list(final_copies.keys()):
                         if not match(filename):
                             del final_copies[filename]
                 return final_copies
             # constant to decide which side to pick with _merge_copies_dict
             PICK_MINOR = 0
             PICK_MAJOR = 1
             PICK_EITHER = 2
             def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         pick, overwrite = _compare_values(
                             changes, isancestor, dest, other, value
                         )
                         if overwrite:
                             if pick == PICK_MAJOR:
                                 minor[dest] = (current_merge, value[1])
                             else:
                                 minor[dest] = (current_merge, other[1])
                         elif pick == PICK_MAJOR:
                             minor[dest] = value
                 return minor
             def _compare_values(changes, isancestor, dest, minor, major):
                 """compare two value within a _merge_copies_dict loop iteration
                 return (pick, overwrite).
                 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
                 - overwrite is True if pick is a return of an ambiguity that needs resolution.
                 """
                 major_tt, major_value = major
                 minor_tt, minor_value = minor
                 if major_tt == minor_tt:
                     # if it comes from the same revision it must be the same value
                     assert major_value == minor_value
                     return PICK_EITHER, False
                 elif (
                     changes is not None
                     and minor_value is not None
                     and major_value is None
                     and dest in changes.salvaged
                 ):
                     # In this case, a deletion was reverted, the "alive" value overwrite
                     # the deleted one.
                     return PICK_MINOR, True
                 elif (
                     changes is not None
                     and major_value is not None
                     and minor_value is None
                     and dest in changes.salvaged
                 ):
                     # In this case, a deletion was reverted, the "alive" value overwrite
                     # the deleted one.
                     return PICK_MAJOR, True
                 elif isancestor(minor_tt, major_tt):
                     if changes is not None and dest in changes.merged:
                         # change to dest happened on the branch without copy-source change,
                         # so both source are valid and "major" wins.
                         return PICK_MAJOR, True
                     else:
                         return PICK_MAJOR, False
                 elif isancestor(major_tt, minor_tt):
                     if changes is not None and dest in changes.merged:
                         # change to dest happened on the branch without copy-source change,
                         # so both source are valid and "major" wins.
                         return PICK_MAJOR, True
                     else:
                         return PICK_MINOR, False
                 elif minor_value is None:
                     # in case of conflict, the "alive" side wins.
                     return PICK_MAJOR, True
                 elif major_value is None:
                     # in case of conflict, the "alive" side wins.
                     return PICK_MINOR, True
                 else:
                     # in case of conflict where both side are alive, major wins.
                     return PICK_MAJOR, True
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != repo.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 """find renames from a to b"""
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 copies = _forwardcopies(b, a)
                 return _reverse_renames(copies, a, match)
             def _reverse_renames(copies, dst, match):
                 """given copies to context 'dst', finds renames from that context"""
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 r = {}
                 for k, v in sorted(pycompat.iteritems(copies)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in dst:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
                     x_copies = _forwardcopies(a, x)
                     y_copies = _forwardcopies(a, y, base, match=match)
                     same_keys = set(x_copies) & set(y_copies)
                     for k in same_keys:
                         if x_copies.get(k) == y_copies.get(k):
                             del x_copies[k]
                             del y_copies[k]
                     x_backward_renames = _reverse_renames(x_copies, x, match)
                     copies = _chain(
                         x_backward_renames,
                         y_copies,
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 # Sort the directories in reverse order, so we find children first
                 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
                 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
                 # to dir2/subdir2/file (not dir2/subdir1/file)
                 dirmove_children_first = sorted(dirmove, reverse=True)
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove_children_first:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 parent = wctx.p1()
                 _filter(parent, wctx, new_copies)
                 # Extra filtering to drop copy information for files that existed before
                 # the graft. This is to handle the case of grafting a rename onto a commit
                 # that already has the rename. Otherwise the presence of copy information
                 # would result in the creation of an empty commit where we would prefer to
                 # not create one.
                 for dest, __ in list(new_copies.items()):
                     if dest in parent:
                         del new_copies[dest]
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)