upstream/mercurial-mirror Commit - r49307:19e6446c

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import nullrev

15

from .node import nullrev

16

17

from . import (

17

from . import (

18

match as matchmod,

18

match as matchmod,

19

pathutil,

19

pathutil,

20

policy,

20

policy,

21

pycompat,

21

pycompat,

22

util,

22

util,

23

)

23

)

24

25

26

from .utils import stringutil

26

from .utils import stringutil

27

28

from .revlogutils import (

28

from .revlogutils import (

29

flagutil,

29

flagutil,

30

sidedata as sidedatamod,

30

sidedata as sidedatamod,

31

)

31

)

32

33

rustmod = policy.importrust("copy_tracing")

33

rustmod = policy.importrust("copy_tracing")

34

35

36

def _filter(src, dst, t):

36

def _filter(src, dst, t):

37

"""filters out invalid copies after chaining"""

37

"""filters out invalid copies after chaining"""

38

39

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

39

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

40

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

40

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

41

# in the following table (not including trivial cases). For example, case 6

41

# in the following table (not including trivial cases). For example, case 6

42

# is where a file existed in 'src' and remained under that name in 'mid' and

42

# is where a file existed in 'src' and remained under that name in 'mid' and

43

# then was renamed between 'mid' and 'dst'.

43

# then was renamed between 'mid' and 'dst'.

44

#

44

#

45

# case src mid dst result

45

# case src mid dst result

46

# 1 x y - -

46

# 1 x y - -

47

# 2 x y y x->y

47

# 2 x y y x->y

48

# 3 x y x -

48

# 3 x y x -

49

# 4 x y z x->z

49

# 4 x y z x->z

50

# 5 - x y -

50

# 5 - x y -

51

# 6 x x y x->y

51

# 6 x x y x->y

52

#

52

#

53

# _chain() takes care of chaining the copies in 'a' and 'b', but it

53

# _chain() takes care of chaining the copies in 'a' and 'b', but it

54

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

54

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

55

# between 5 and 6, so it includes all cases in its result.

55

# between 5 and 6, so it includes all cases in its result.

56

# Cases 1, 3, and 5 are then removed by _filter().

56

# Cases 1, 3, and 5 are then removed by _filter().

57

58

for k, v in list(t.items()):

58

for k, v in list(t.items()):

59

if k == v: # case 3

59

if k == v: # case 3

60

del t[k]

60

del t[k]

61

elif v not in src: # case 5

61

elif v not in src: # case 5

62

# remove copies from files that didn't exist

62

# remove copies from files that didn't exist

63

del t[k]

63

del t[k]

64

elif k not in dst: # case 1

64

elif k not in dst: # case 1

65

# remove copies to files that were then removed

65

# remove copies to files that were then removed

66

del t[k]

66

del t[k]

67

68

69

def _chain(prefix, suffix):

69

def _chain(prefix, suffix):

70

"""chain two sets of copies 'prefix' and 'suffix'"""

70

"""chain two sets of copies 'prefix' and 'suffix'"""

71

result = prefix.copy()

71

result = prefix.copy()

72

for key, value in pycompat.iteritems(suffix):

72

for key, value in pycompat.iteritems(suffix):

73

result[key] = prefix.get(value, value)

73

result[key] = prefix.get(value, value)

74

return result

74

return result

75

76

77

def _tracefile(fctx, am, basemf):

77

def _tracefile(fctx, am, basemf):

78

"""return file context that is the ancestor of fctx present in ancestor

78

"""return file context that is the ancestor of fctx present in ancestor

79

manifest am

79

manifest am

80

81

Note: we used to try and stop after a given limit, however checking if that

81

Note: we used to try and stop after a given limit, however checking if that

82

limit is reached turned out to be very expensive. we are better off

82

limit is reached turned out to be very expensive. we are better off

83

disabling that feature."""

83

disabling that feature."""

84

85

for f in fctx.ancestors():

85

for f in fctx.ancestors():

86

path = f.path()

86

path = f.path()

87

if am.get(path, None) == f.filenode():

87

if am.get(path, None) == f.filenode():

88

return path

88

return path

89

if basemf and basemf.get(path, None) == f.filenode():

89

if basemf and basemf.get(path, None) == f.filenode():

90

return path

90

return path

91

92

93

def _dirstatecopies(repo, match=None):

93

def _dirstatecopies(repo, match=None):

94

ds = repo.dirstate

94

ds = repo.dirstate

95

c = ds.copies().copy()

95

c = ds.copies().copy()

96

for k in list(c):

96

for k in list(c):

97

if not ds.get_entry(k).tracked or (match and not match(k)):

97

if not ds.get_entry(k).tracked or (match and not match(k)):

98

del c[k]

98

del c[k]

99

return c

99

return c

100

101

102

def _computeforwardmissing(a, b, match=None):

102

def _computeforwardmissing(a, b, match=None):

103

"""Computes which files are in b but not a.

103

"""Computes which files are in b but not a.

104

This is its own function so extensions can easily wrap this call to see what

104

This is its own function so extensions can easily wrap this call to see what

105

files _forwardcopies is about to process.

105

files _forwardcopies is about to process.

106

"""

106

"""

107

ma = a.manifest()

107

ma = a.manifest()

108

mb = b.manifest()

108

mb = b.manifest()

109

return mb.filesnotin(ma, match=match)

109

return mb.filesnotin(ma, match=match)

110

111

112

def usechangesetcentricalgo(repo):

112

def usechangesetcentricalgo(repo):

113

"""Checks if we should use changeset-centric copy algorithms"""

113

"""Checks if we should use changeset-centric copy algorithms"""

114

if repo.filecopiesmode == b'changeset-sidedata':

114

if repo.filecopiesmode == b'changeset-sidedata':

115

return True

115

return True

116

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

116

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

changesetsource = (b'changeset-only', b'compatibility')

117

changesetsource = (b'changeset-only', b'compatibility')

118

return readfrom in changesetsource

118

return readfrom in changesetsource

119

120

121

def _committedforwardcopies(a, b, base, match):

121

def _committedforwardcopies(a, b, base, match):

122

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

122

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

# files might have to be traced back to the fctx parent of the last

123

# files might have to be traced back to the fctx parent of the last

124

# one-side-only changeset, but not further back than that

124

# one-side-only changeset, but not further back than that

125

repo = a._repo

125

repo = a._repo

126

127

if usechangesetcentricalgo(repo):

127

if usechangesetcentricalgo(repo):

128

return _changesetforwardcopies(a, b, match)

128

return _changesetforwardcopies(a, b, match)

129

130

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

130

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

dbg = repo.ui.debug

131

dbg = repo.ui.debug

132

if debug:

132

if debug:

133

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

133

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

am = a.manifest()

134

am = a.manifest()

135

basemf = None if base is None else base.manifest()

135

basemf = None if base is None else base.manifest()

136

137

# find where new files came from

137

# find where new files came from

138

# we currently don't try to find where old files went, too expensive

138

# we currently don't try to find where old files went, too expensive

139

# this means we can miss a case like 'hg rm b; hg cp a b'

139

# this means we can miss a case like 'hg rm b; hg cp a b'

140

cm = {}

140

cm = {}

141

142

# Computing the forward missing is quite expensive on large manifests, since

142

# Computing the forward missing is quite expensive on large manifests, since

143

# it compares the entire manifests. We can optimize it in the common use

143

# it compares the entire manifests. We can optimize it in the common use

144

# case of computing what copies are in a commit versus its parent (like

144

# case of computing what copies are in a commit versus its parent (like

145

# during a rebase or histedit). Note, we exclude merge commits from this

145

# during a rebase or histedit). Note, we exclude merge commits from this

146

# optimization, since the ctx.files() for a merge commit is not correct for

146

# optimization, since the ctx.files() for a merge commit is not correct for

147

# this comparison.

147

# this comparison.

148

forwardmissingmatch = match

148

forwardmissingmatch = match

149

if b.p1() == a and b.p2().rev() == nullrev:

149

if b.p1() == a and b.p2().rev() == nullrev:

150

filesmatcher = matchmod.exact(b.files())

150

filesmatcher = matchmod.exact(b.files())

151

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

151

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

152

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

153

missing = list(b.walk(match))

153

missing = list(b.walk(match))

154

# _computeforwardmissing(a, b, match=forwardmissingmatch)

154

# _computeforwardmissing(a, b, match=forwardmissingmatch)

155

if debug:

155

if debug:

156

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

156

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

157

else:

157

else:

158

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

158

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

159

if debug:

159

if debug:

160

dbg(

160

dbg(

161

b'debug.copies: missing files to search: %d\n'

161

b'debug.copies: missing files to search: %d\n'

162

% len(missing)

162

% len(missing)

163

)

163

)

164

165

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

165

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

166

167

for f in sorted(missing):

167

for f in sorted(missing):

168

if debug:

168

if debug:

169

dbg(b'debug.copies: tracing file: %s\n' % f)

169

dbg(b'debug.copies: tracing file: %s\n' % f)

170

fctx = b[f]

170

fctx = b[f]

171

fctx._ancestrycontext = ancestrycontext

171

fctx._ancestrycontext = ancestrycontext

172

173

if debug:

173

if debug:

174

start = util.timer()

174

start = util.timer()

175

opath = _tracefile(fctx, am, basemf)

175

opath = _tracefile(fctx, am, basemf)

176

if opath:

176

if opath:

177

if debug:

177

if debug:

178

dbg(b'debug.copies: rename of: %s\n' % opath)

178

dbg(b'debug.copies: rename of: %s\n' % opath)

179

cm[f] = opath

179

cm[f] = opath

180

if debug:

180

if debug:

181

dbg(

181

dbg(

182

b'debug.copies: time: %f seconds\n'

182

b'debug.copies: time: %f seconds\n'

183

% (util.timer() - start)

183

% (util.timer() - start)

184

)

184

)

185

return cm

185

return cm

186

187

188

def _revinfo_getter(repo, match):

188

def _revinfo_getter(repo, match):

189

"""returns a function that returns the following data given a <rev>"

189

"""returns a function that returns the following data given a <rev>"

190

191

* p1: revision number of first parent

191

* p1: revision number of first parent

192

* p2: revision number of first parent

192

* p2: revision number of first parent

193

* changes: a ChangingFiles object

193

* changes: a ChangingFiles object

194

"""

194

"""

195

cl = repo.changelog

195

cl = repo.changelog

196

parents = cl.parentrevs

196

parents = cl.parentrevs

197

flags = cl.flags

197

flags = cl.flags

198

199

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

199

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

200

201

changelogrevision = cl.changelogrevision

201

changelogrevision = cl.changelogrevision

202

203

if rustmod is not None:

203

if rustmod is not None:

204

205

def revinfo(rev):

205

def revinfo(rev):

206

p1, p2 = parents(rev)

206

p1, p2 = parents(rev)

207

if flags(rev) & HASCOPIESINFO:

207

if flags(rev) & HASCOPIESINFO:

208

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

208

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

209

else:

209

else:

210

raw = None

210

raw = None

211

return (p1, p2, raw)

211

return (p1, p2, raw)

212

213

else:

213

else:

214

215

def revinfo(rev):

215

def revinfo(rev):

216

p1, p2 = parents(rev)

216

p1, p2 = parents(rev)

217

if flags(rev) & HASCOPIESINFO:

217

if flags(rev) & HASCOPIESINFO:

218

changes = changelogrevision(rev).changes

218

changes = changelogrevision(rev).changes

219

else:

219

else:

220

changes = None

220

changes = None

221

return (p1, p2, changes)

221

return (p1, p2, changes)

222

223

return revinfo

223

return revinfo

224

225

226

def cached_is_ancestor(is_ancestor):

226

def cached_is_ancestor(is_ancestor):

227

"""return a cached version of is_ancestor"""

227

"""return a cached version of is_ancestor"""

228

cache = {}

228

cache = {}

229

230

def _is_ancestor(anc, desc):

230

def _is_ancestor(anc, desc):

231

if anc > desc:

231

if anc > desc:

232

return False

232

return False

233

elif anc == desc:

233

elif anc == desc:

234

return True

234

return True

235

key = (anc, desc)

235

key = (anc, desc)

236

ret = cache.get(key)

236

ret = cache.get(key)

237

if ret is None:

237

if ret is None:

238

ret = cache[key] = is_ancestor(anc, desc)

238

ret = cache[key] = is_ancestor(anc, desc)

239

return ret

239

return ret

240

241

return _is_ancestor

241

return _is_ancestor

242

243

244

def _changesetforwardcopies(a, b, match):

244

def _changesetforwardcopies(a, b, match):

245

if a.rev() in (nullrev, b.rev()):

245

if a.rev() in (nullrev, b.rev()):

246

return {}

246

return {}

247

248

repo = a.repo().unfiltered()

248

repo = a.repo().unfiltered()

249

250

cl = repo.changelog

250

cl = repo.changelog

251

isancestor = cl.isancestorrev

251

isancestor = cl.isancestorrev

252

253

# To track rename from "A" to B, we need to gather all parent → children

253

# To track rename from "A" to B, we need to gather all parent → children

254

# edges that are contains in `::B` but not in `::A`.

254

# edges that are contains in `::B` but not in `::A`.

255

#

255

#

256

#

256

#

257

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

257

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

258

# ::a`) and also all the "roots point", ie the parents of the exclusive set

258

# ::a`) and also all the "roots point", ie the parents of the exclusive set

259

# that belong to ::a. These are exactly all the revisions needed to express

259

# that belong to ::a. These are exactly all the revisions needed to express

260

# the parent → children we need to combine.

260

# the parent → children we need to combine.

261

#

261

#

262

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

262

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

263

# excluding paths that leads to roots that are not ancestors of `a`. We

263

# excluding paths that leads to roots that are not ancestors of `a`. We

264

# keep this out of the explanation because it is hard enough without this special case..

264

# keep this out of the explanation because it is hard enough without this special case..

265

266

parents = cl._uncheckedparentrevs

266

parents = cl._uncheckedparentrevs

267

graph_roots = (nullrev, nullrev)

267

graph_roots = (nullrev, nullrev)

268

269

ancestors = cl.ancestors([a.rev()], inclusive=True)

269

ancestors = cl.ancestors([a.rev()], inclusive=True)

270

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

270

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

271

roots = set()

271

roots = set()

272

has_graph_roots = False

272

has_graph_roots = False

273

multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')

273

multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')

274

275

# iterate over `only(B, A)`

275

# iterate over `only(B, A)`

276

for r in revs:

276

for r in revs:

277

ps = parents(r)

277

ps = parents(r)

278

if ps == graph_roots:

278

if ps == graph_roots:

279

has_graph_roots = True

279

has_graph_roots = True

280

else:

280

else:

281

p1, p2 = ps

281

p1, p2 = ps

282

283

# find all the "root points" (see larger comment above)

283

# find all the "root points" (see larger comment above)

284

if p1 != nullrev and p1 in ancestors:

284

if p1 != nullrev and p1 in ancestors:

285

roots.add(p1)

285

roots.add(p1)

286

if p2 != nullrev and p2 in ancestors:

286

if p2 != nullrev and p2 in ancestors:

287

roots.add(p2)

287

roots.add(p2)

288

if not roots:

288

if not roots:

289

# no common revision to track copies from

289

# no common revision to track copies from

290

return {}

290

return {}

291

if has_graph_roots:

291

if has_graph_roots:

292

# this deal with the special case mentionned in the [1] footnotes. We

292

# this deal with the special case mentioned in the [1] footnotes. We

293

# must filter out revisions that leads to non-common graphroots.

293

# must filter out revisions that leads to non-common graphroots.

294

roots = list(roots)

294

roots = list(roots)

295

m = min(roots)

295

m = min(roots)

296

h = [b.rev()]

296

h = [b.rev()]

297

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

297

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

298

roots_to_head = set(roots_to_head)

298

roots_to_head = set(roots_to_head)

299

revs = [r for r in revs if r in roots_to_head]

299

revs = [r for r in revs if r in roots_to_head]

300

301

if repo.filecopiesmode == b'changeset-sidedata':

301

if repo.filecopiesmode == b'changeset-sidedata':

302

# When using side-data, we will process the edges "from" the children.

302

# When using side-data, we will process the edges "from" the children.

303

# We iterate over the childre, gathering previous collected data for

303

# We iterate over the children, gathering previous collected data for

304

# the parents. Do know when the parents data is no longer necessary, we

304

# the parents. Do know when the parents data is no longer necessary, we

305

# keep a counter of how many children each revision has.

305

# keep a counter of how many children each revision has.

306

#

306

#

307

# An interresting property of `children_count` is that it only contains

307

# An interesting property of `children_count` is that it only contains

308

# revision that will be relevant for a edge of the graph. So if a

308

# revision that will be relevant for a edge of the graph. So if a

309

# children has parent not in `children_count`, that edges should not be

309

# children has parent not in `children_count`, that edges should not be

310

# processed.

310

# processed.

311

children_count = dict((r, 0) for r in roots)

311

children_count = dict((r, 0) for r in roots)

312

for r in revs:

312

for r in revs:

313

for p in cl.parentrevs(r):

313

for p in cl.parentrevs(r):

314

if p == nullrev:

314

if p == nullrev:

315

continue

315

continue

316

children_count[r] = 0

316

children_count[r] = 0

317

if p in children_count:

317

if p in children_count:

318

children_count[p] += 1

318

children_count[p] += 1

319

revinfo = _revinfo_getter(repo, match)

319

revinfo = _revinfo_getter(repo, match)

320

with repo.changelog.reading():

320

with repo.changelog.reading():

321

return _combine_changeset_copies(

321

return _combine_changeset_copies(

322

revs,

322

revs,

323

children_count,

323

children_count,

324

b.rev(),

324

b.rev(),

325

revinfo,

325

revinfo,

326

match,

326

match,

327

isancestor,

327

isancestor,

328

multi_thread,

328

multi_thread,

329

)

329

)

330

else:

330

else:

331

# When not using side-data, we will process the edges "from" the parent.

331

# When not using side-data, we will process the edges "from" the parent.

332

# so we need a full mapping of the parent -> children relation.

332

# so we need a full mapping of the parent -> children relation.

333

children = dict((r, []) for r in roots)

333

children = dict((r, []) for r in roots)

334

for r in revs:

334

for r in revs:

335

for p in cl.parentrevs(r):

335

for p in cl.parentrevs(r):

336

if p == nullrev:

336

if p == nullrev:

337

continue

337

continue

338

children[r] = []

338

children[r] = []

339

if p in children:

339

if p in children:

340

children[p].append(r)

340

children[p].append(r)

341

x = revs.pop()

341

x = revs.pop()

342

assert x == b.rev()

342

assert x == b.rev()

343

revs.extend(roots)

343

revs.extend(roots)

344

revs.sort()

344

revs.sort()

345

346

revinfo = _revinfo_getter_extra(repo)

346

revinfo = _revinfo_getter_extra(repo)

347

return _combine_changeset_copies_extra(

347

return _combine_changeset_copies_extra(

348

revs, children, b.rev(), revinfo, match, isancestor

348

revs, children, b.rev(), revinfo, match, isancestor

349

)

349

)

350

351

352

def _combine_changeset_copies(

352

def _combine_changeset_copies(

353

revs, children_count, targetrev, revinfo, match, isancestor, multi_thread

353

revs, children_count, targetrev, revinfo, match, isancestor, multi_thread

354

):

354

):

355

"""combine the copies information for each item of iterrevs

355

"""combine the copies information for each item of iterrevs

356

357

revs: sorted iterable of revision to visit

357

revs: sorted iterable of revision to visit

358

children_count: a {parent: <number-of-relevant-children>} mapping.

358

children_count: a {parent: <number-of-relevant-children>} mapping.

359

targetrev: the final copies destination revision (not in iterrevs)

359

targetrev: the final copies destination revision (not in iterrevs)

360

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

360

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

361

match: a matcher

361

match: a matcher

362

363

It returns the aggregated copies information for `targetrev`.

363

It returns the aggregated copies information for `targetrev`.

364

"""

364

"""

365

366

alwaysmatch = match.always()

366

alwaysmatch = match.always()

367

368

if rustmod is not None:

368

if rustmod is not None:

369

final_copies = rustmod.combine_changeset_copies(

369

final_copies = rustmod.combine_changeset_copies(

370

list(revs), children_count, targetrev, revinfo, multi_thread

370

list(revs), children_count, targetrev, revinfo, multi_thread

371

)

371

)

372

else:

372

else:

373

isancestor = cached_is_ancestor(isancestor)

373

isancestor = cached_is_ancestor(isancestor)

374

375

all_copies = {}

375

all_copies = {}

376

# iterate over all the "children" side of copy tracing "edge"

376

# iterate over all the "children" side of copy tracing "edge"

377

for current_rev in revs:

377

for current_rev in revs:

378

p1, p2, changes = revinfo(current_rev)

378

p1, p2, changes = revinfo(current_rev)

379

current_copies = None

379

current_copies = None

380

# iterate over all parents to chain the existing data with the

380

# iterate over all parents to chain the existing data with the

381

# data from the parent → child edge.

381

# data from the parent → child edge.

382

for parent, parent_rev in ((1, p1), (2, p2)):

382

for parent, parent_rev in ((1, p1), (2, p2)):

383

if parent_rev == nullrev:

383

if parent_rev == nullrev:

384

continue

384

continue

385

remaining_children = children_count.get(parent_rev)

385

remaining_children = children_count.get(parent_rev)

386

if remaining_children is None:

386

if remaining_children is None:

387

continue

387

continue

388

remaining_children -= 1

388

remaining_children -= 1

389

children_count[parent_rev] = remaining_children

389

children_count[parent_rev] = remaining_children

390

if remaining_children:

390

if remaining_children:

391

copies = all_copies.get(parent_rev, None)

391

copies = all_copies.get(parent_rev, None)

392

else:

392

else:

393

copies = all_copies.pop(parent_rev, None)

393

copies = all_copies.pop(parent_rev, None)

394

395

if copies is None:

395

if copies is None:

396

# this is a root

396

# this is a root

397

newcopies = copies = {}

397

newcopies = copies = {}

398

elif remaining_children:

398

elif remaining_children:

399

newcopies = copies.copy()

399

newcopies = copies.copy()

400

else:

400

else:

401

newcopies = copies

401

newcopies = copies

402

# chain the data in the edge with the existing data

402

# chain the data in the edge with the existing data

403

if changes is not None:

403

if changes is not None:

404

childcopies = {}

404

childcopies = {}

405

if parent == 1:

405

if parent == 1:

406

childcopies = changes.copied_from_p1

406

childcopies = changes.copied_from_p1

407

elif parent == 2:

407

elif parent == 2:

408

childcopies = changes.copied_from_p2

408

childcopies = changes.copied_from_p2

409

410

if childcopies:

410

if childcopies:

411

newcopies = copies.copy()

411

newcopies = copies.copy()

412

for dest, source in pycompat.iteritems(childcopies):

412

for dest, source in pycompat.iteritems(childcopies):

413

prev = copies.get(source)

413

prev = copies.get(source)

414

if prev is not None and prev[1] is not None:

414

if prev is not None and prev[1] is not None:

415

source = prev[1]

415

source = prev[1]

416

newcopies[dest] = (current_rev, source)

416

newcopies[dest] = (current_rev, source)

417

assert newcopies is not copies

417

assert newcopies is not copies

418

if changes.removed:

418

if changes.removed:

419

for f in changes.removed:

419

for f in changes.removed:

420

if f in newcopies:

420

if f in newcopies:

421

if newcopies is copies:

421

if newcopies is copies:

422

# copy on write to avoid affecting potential other

422

# copy on write to avoid affecting potential other

423

# branches. when there are no other branches, this

423

# branches. when there are no other branches, this

424

# could be avoided.

424

# could be avoided.

425

newcopies = copies.copy()

425

newcopies = copies.copy()

426

newcopies[f] = (current_rev, None)

426

newcopies[f] = (current_rev, None)

427

# check potential need to combine the data from another parent (for

427

# check potential need to combine the data from another parent (for

428

# that child). See comment below for details.

428

# that child). See comment below for details.

429

if current_copies is None:

429

if current_copies is None:

430

current_copies = newcopies

430

current_copies = newcopies

431

else:

431

else:

432

# we are the second parent to work on c, we need to merge our

432

# we are the second parent to work on c, we need to merge our

433

# work with the other.

433

# work with the other.

434

#

434

#

435

# In case of conflict, parent 1 take precedence over parent 2.

435

# In case of conflict, parent 1 take precedence over parent 2.

436

# This is an arbitrary choice made anew when implementing

436

# This is an arbitrary choice made anew when implementing

437

# changeset based copies. It was made without regards with

437

# changeset based copies. It was made without regards with

438

# potential filelog related behavior.

438

# potential filelog related behavior.

439

assert parent == 2

439

assert parent == 2

440

current_copies = _merge_copies_dict(

440

current_copies = _merge_copies_dict(

441

newcopies,

441

newcopies,

442

current_copies,

442

current_copies,

443

isancestor,

443

isancestor,

444

changes,

444

changes,

445

current_rev,

445

current_rev,

446

)

446

)

447

all_copies[current_rev] = current_copies

447

all_copies[current_rev] = current_copies

448

449

# filter out internal details and return a {dest: source mapping}

449

# filter out internal details and return a {dest: source mapping}

450

final_copies = {}

450

final_copies = {}

451

for dest, (tt, source) in all_copies[targetrev].items():

451

for dest, (tt, source) in all_copies[targetrev].items():

452

if source is not None:

452

if source is not None:

453

final_copies[dest] = source

453

final_copies[dest] = source

454

if not alwaysmatch:

454

if not alwaysmatch:

455

for filename in list(final_copies.keys()):

455

for filename in list(final_copies.keys()):

456

if not match(filename):

456

if not match(filename):

457

del final_copies[filename]

457

del final_copies[filename]

458

return final_copies

458

return final_copies

459

460

461

# constant to decide which side to pick with _merge_copies_dict

461

# constant to decide which side to pick with _merge_copies_dict

462

PICK_MINOR = 0

462

PICK_MINOR = 0

463

PICK_MAJOR = 1

463

PICK_MAJOR = 1

464

PICK_EITHER = 2

464

PICK_EITHER = 2

465

466

467

def _merge_copies_dict(minor, major, isancestor, changes, current_merge):

467

def _merge_copies_dict(minor, major, isancestor, changes, current_merge):

468

"""merge two copies-mapping together, minor and major

468

"""merge two copies-mapping together, minor and major

469

470

In case of conflict, value from "major" will be picked.

470

In case of conflict, value from "major" will be picked.

471

472

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

472

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

473

ancestors of `high_rev`,

473

ancestors of `high_rev`,

474

475

- `ismerged(path)`: callable return True if `path` have been merged in the

475

- `ismerged(path)`: callable return True if `path` have been merged in the

476

current revision,

476

current revision,

477

478

return the resulting dict (in practice, the "minor" object, updated)

478

return the resulting dict (in practice, the "minor" object, updated)

479

"""

479

"""

480

for dest, value in major.items():

480

for dest, value in major.items():

481

other = minor.get(dest)

481

other = minor.get(dest)

482

if other is None:

482

if other is None:

483

minor[dest] = value

483

minor[dest] = value

484

else:

484

else:

485

pick, overwrite = _compare_values(

485

pick, overwrite = _compare_values(

486

changes, isancestor, dest, other, value

486

changes, isancestor, dest, other, value

487

)

487

)

488

if overwrite:

488

if overwrite:

489

if pick == PICK_MAJOR:

489

if pick == PICK_MAJOR:

490

minor[dest] = (current_merge, value[1])

490

minor[dest] = (current_merge, value[1])

491

else:

491

else:

492

minor[dest] = (current_merge, other[1])

492

minor[dest] = (current_merge, other[1])

493

elif pick == PICK_MAJOR:

493

elif pick == PICK_MAJOR:

494

minor[dest] = value

494

minor[dest] = value

495

return minor

495

return minor

496

497

498

def _compare_values(changes, isancestor, dest, minor, major):

498

def _compare_values(changes, isancestor, dest, minor, major):

499

"""compare two value within a _merge_copies_dict loop iteration

499

"""compare two value within a _merge_copies_dict loop iteration

500

501

return (pick, overwrite).

501

return (pick, overwrite).

502

503

- pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER

503

- pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER

504

- overwrite is True if pick is a return of an ambiguity that needs resolution.

504

- overwrite is True if pick is a return of an ambiguity that needs resolution.

505

"""

505

"""

506

major_tt, major_value = major

506

major_tt, major_value = major

507

minor_tt, minor_value = minor

507

minor_tt, minor_value = minor

508

509

if major_tt == minor_tt:

509

if major_tt == minor_tt:

510

# if it comes from the same revision it must be the same value

510

# if it comes from the same revision it must be the same value

511

assert major_value == minor_value

511

assert major_value == minor_value

512

return PICK_EITHER, False

512

return PICK_EITHER, False

513

elif (

513

elif (

514

changes is not None

514

changes is not None

515

and minor_value is not None

515

and minor_value is not None

516

and major_value is None

516

and major_value is None

517

and dest in changes.salvaged

517

and dest in changes.salvaged

518

):

518

):

519

# In this case, a deletion was reverted, the "alive" value overwrite

519

# In this case, a deletion was reverted, the "alive" value overwrite

520

# the deleted one.

520

# the deleted one.

521

return PICK_MINOR, True

521

return PICK_MINOR, True

522

elif (

522

elif (

523

changes is not None

523

changes is not None

524

and major_value is not None

524

and major_value is not None

525

and minor_value is None

525

and minor_value is None

526

and dest in changes.salvaged

526

and dest in changes.salvaged

527

):

527

):

528

# In this case, a deletion was reverted, the "alive" value overwrite

528

# In this case, a deletion was reverted, the "alive" value overwrite

529

# the deleted one.

529

# the deleted one.

530

return PICK_MAJOR, True

530

return PICK_MAJOR, True

531

elif isancestor(minor_tt, major_tt):

531

elif isancestor(minor_tt, major_tt):

532

if changes is not None and dest in changes.merged:

532

if changes is not None and dest in changes.merged:

533

# change to dest happened on the branch without copy-source change,

533

# change to dest happened on the branch without copy-source change,

534

# so both source are valid and "major" wins.

534

# so both source are valid and "major" wins.

535

return PICK_MAJOR, True

535

return PICK_MAJOR, True

536

else:

536

else:

537

return PICK_MAJOR, False

537

return PICK_MAJOR, False

538

elif isancestor(major_tt, minor_tt):

538

elif isancestor(major_tt, minor_tt):

539

if changes is not None and dest in changes.merged:

539

if changes is not None and dest in changes.merged:

540

# change to dest happened on the branch without copy-source change,

540

# change to dest happened on the branch without copy-source change,

541

# so both source are valid and "major" wins.

541

# so both source are valid and "major" wins.

542

return PICK_MAJOR, True

542

return PICK_MAJOR, True

543

else:

543

else:

544

return PICK_MINOR, False

544

return PICK_MINOR, False

545

elif minor_value is None:

545

elif minor_value is None:

546

# in case of conflict, the "alive" side wins.

546

# in case of conflict, the "alive" side wins.

547

return PICK_MAJOR, True

547

return PICK_MAJOR, True

548

elif major_value is None:

548

elif major_value is None:

549

# in case of conflict, the "alive" side wins.

549

# in case of conflict, the "alive" side wins.

550

return PICK_MINOR, True

550

return PICK_MINOR, True

551

else:

551

else:

552

# in case of conflict where both side are alive, major wins.

552

# in case of conflict where both side are alive, major wins.

553

return PICK_MAJOR, True

553

return PICK_MAJOR, True

554

555

556

def _revinfo_getter_extra(repo):

556

def _revinfo_getter_extra(repo):

557

"""return a function that return multiple data given a <rev>"i

557

"""return a function that return multiple data given a <rev>"i

558

559

* p1: revision number of first parent

559

* p1: revision number of first parent

560

* p2: revision number of first parent

560

* p2: revision number of first parent

561

* p1copies: mapping of copies from p1

561

* p1copies: mapping of copies from p1

562

* p2copies: mapping of copies from p2

562

* p2copies: mapping of copies from p2

563

* removed: a list of removed files

563

* removed: a list of removed files

564

* ismerged: a callback to know if file was merged in that revision

564

* ismerged: a callback to know if file was merged in that revision

565

"""

565

"""

566

cl = repo.changelog

566

cl = repo.changelog

567

parents = cl.parentrevs

567

parents = cl.parentrevs

568

569

def get_ismerged(rev):

569

def get_ismerged(rev):

570

ctx = repo[rev]

570

ctx = repo[rev]

571

572

def ismerged(path):

572

def ismerged(path):

573

if path not in ctx.files():

573

if path not in ctx.files():

574

return False

574

return False

575

fctx = ctx[path]

575

fctx = ctx[path]

576

parents = fctx._filelog.parents(fctx._filenode)

576

parents = fctx._filelog.parents(fctx._filenode)

577

nb_parents = 0

577

nb_parents = 0

578

for n in parents:

578

for n in parents:

579

if n != repo.nullid:

579

if n != repo.nullid:

580

nb_parents += 1

580

nb_parents += 1

581

return nb_parents >= 2

581

return nb_parents >= 2

582

583

return ismerged

583

return ismerged

584

585

def revinfo(rev):

585

def revinfo(rev):

586

p1, p2 = parents(rev)

586

p1, p2 = parents(rev)

587

ctx = repo[rev]

587

ctx = repo[rev]

588

p1copies, p2copies = ctx._copies

588

p1copies, p2copies = ctx._copies

589

removed = ctx.filesremoved()

589

removed = ctx.filesremoved()

590

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

590

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

591

592

return revinfo

592

return revinfo

593

594

595

def _combine_changeset_copies_extra(

595

def _combine_changeset_copies_extra(

596

revs, children, targetrev, revinfo, match, isancestor

596

revs, children, targetrev, revinfo, match, isancestor

597

):

597

):

598

"""version of `_combine_changeset_copies` that works with the Google

598

"""version of `_combine_changeset_copies` that works with the Google

599

specific "extra" based storage for copy information"""

599

specific "extra" based storage for copy information"""

600

all_copies = {}

600

all_copies = {}

601

alwaysmatch = match.always()

601

alwaysmatch = match.always()

602

for r in revs:

602

for r in revs:

603

copies = all_copies.pop(r, None)

603

copies = all_copies.pop(r, None)

604

if copies is None:

604

if copies is None:

605

# this is a root

605

# this is a root

606

copies = {}

606

copies = {}

607

for i, c in enumerate(children[r]):

607

for i, c in enumerate(children[r]):

608

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

608

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

609

if r == p1:

609

if r == p1:

610

parent = 1

610

parent = 1

611

childcopies = p1copies

611

childcopies = p1copies

612

else:

612

else:

613

assert r == p2

613

assert r == p2

614

parent = 2

614

parent = 2

615

childcopies = p2copies

615

childcopies = p2copies

616

if not alwaysmatch:

616

if not alwaysmatch:

617

childcopies = {

617

childcopies = {

618

dst: src for dst, src in childcopies.items() if match(dst)

618

dst: src for dst, src in childcopies.items() if match(dst)

619

}

619

}

620

newcopies = copies

620

newcopies = copies

621

if childcopies:

621

if childcopies:

622

newcopies = copies.copy()

622

newcopies = copies.copy()

623

for dest, source in pycompat.iteritems(childcopies):

623

for dest, source in pycompat.iteritems(childcopies):

624

prev = copies.get(source)

624

prev = copies.get(source)

625

if prev is not None and prev[1] is not None:

625

if prev is not None and prev[1] is not None:

626

source = prev[1]

626

source = prev[1]

627

newcopies[dest] = (c, source)

627

newcopies[dest] = (c, source)

628

assert newcopies is not copies

628

assert newcopies is not copies

629

for f in removed:

629

for f in removed:

630

if f in newcopies:

630

if f in newcopies:

631

if newcopies is copies:

631

if newcopies is copies:

632

# copy on write to avoid affecting potential other

632

# copy on write to avoid affecting potential other

633

# branches. when there are no other branches, this

633

# branches. when there are no other branches, this

634

# could be avoided.

634

# could be avoided.

635

newcopies = copies.copy()

635

newcopies = copies.copy()

636

newcopies[f] = (c, None)

636

newcopies[f] = (c, None)

637

othercopies = all_copies.get(c)

637

othercopies = all_copies.get(c)

638

if othercopies is None:

638

if othercopies is None:

639

all_copies[c] = newcopies

639

all_copies[c] = newcopies

640

else:

640

else:

641

# we are the second parent to work on c, we need to merge our

641

# we are the second parent to work on c, we need to merge our

642

# work with the other.

642

# work with the other.

643

#

643

#

644

# In case of conflict, parent 1 take precedence over parent 2.

644

# In case of conflict, parent 1 take precedence over parent 2.

645

# This is an arbitrary choice made anew when implementing

645

# This is an arbitrary choice made anew when implementing

646

# changeset based copies. It was made without regards with

646

# changeset based copies. It was made without regards with

647

# potential filelog related behavior.

647

# potential filelog related behavior.

648

if parent == 1:

648

if parent == 1:

649

_merge_copies_dict_extra(

649

_merge_copies_dict_extra(

650

othercopies, newcopies, isancestor, ismerged

650

othercopies, newcopies, isancestor, ismerged

651

)

651

)

652

else:

652

else:

653

_merge_copies_dict_extra(

653

_merge_copies_dict_extra(

654

newcopies, othercopies, isancestor, ismerged

654

newcopies, othercopies, isancestor, ismerged

655

)

655

)

656

all_copies[c] = newcopies

656

all_copies[c] = newcopies

657

658

final_copies = {}

658

final_copies = {}

659

for dest, (tt, source) in all_copies[targetrev].items():

659

for dest, (tt, source) in all_copies[targetrev].items():

660

if source is not None:

660

if source is not None:

661

final_copies[dest] = source

661

final_copies[dest] = source

662

return final_copies

662

return final_copies

663

664

665

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

665

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

666

"""version of `_merge_copies_dict` that works with the Google

666

"""version of `_merge_copies_dict` that works with the Google

667

specific "extra" based storage for copy information"""

667

specific "extra" based storage for copy information"""

668

for dest, value in major.items():

668

for dest, value in major.items():

669

other = minor.get(dest)

669

other = minor.get(dest)

670

if other is None:

670

if other is None:

671

minor[dest] = value

671

minor[dest] = value

672

else:

672

else:

673

new_tt = value[0]

673

new_tt = value[0]

674

other_tt = other[0]

674

other_tt = other[0]

675

if value[1] == other[1]:

675

if value[1] == other[1]:

676

continue

676

continue

677

# content from "major" wins, unless it is older

677

# content from "major" wins, unless it is older

678

# than the branch point or there is a merge

678

# than the branch point or there is a merge

679

if (

679

if (

680

new_tt == other_tt

680

new_tt == other_tt

681

or not isancestor(new_tt, other_tt)

681

or not isancestor(new_tt, other_tt)

682

or ismerged(dest)

682

or ismerged(dest)

683

):

683

):

684

minor[dest] = value

684

minor[dest] = value

685

686

687

def _forwardcopies(a, b, base=None, match=None):

687

def _forwardcopies(a, b, base=None, match=None):

688

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

688

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

689

690

if base is None:

690

if base is None:

691

base = a

691

base = a

692

match = a.repo().narrowmatch(match)

692

match = a.repo().narrowmatch(match)

693

# check for working copy

693

# check for working copy

694

if b.rev() is None:

694

if b.rev() is None:

695

cm = _committedforwardcopies(a, b.p1(), base, match)

695

cm = _committedforwardcopies(a, b.p1(), base, match)

696

# combine copies from dirstate if necessary

696

# combine copies from dirstate if necessary

697

copies = _chain(cm, _dirstatecopies(b._repo, match))

697

copies = _chain(cm, _dirstatecopies(b._repo, match))

698

else:

698

else:

699

copies = _committedforwardcopies(a, b, base, match)

699

copies = _committedforwardcopies(a, b, base, match)

700

return copies

700

return copies

701

702

703

def _backwardrenames(a, b, match):

703

def _backwardrenames(a, b, match):

704

"""find renames from a to b"""

704

"""find renames from a to b"""

705

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

705

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

706

return {}

706

return {}

707

708

# We don't want to pass in "match" here, since that would filter

708

# We don't want to pass in "match" here, since that would filter

709

# the destination by it. Since we're reversing the copies, we want

709

# the destination by it. Since we're reversing the copies, we want

710

# to filter the source instead.

710

# to filter the source instead.

711

copies = _forwardcopies(b, a)

711

copies = _forwardcopies(b, a)

712

return _reverse_renames(copies, a, match)

712

return _reverse_renames(copies, a, match)

713

714

715

def _reverse_renames(copies, dst, match):

715

def _reverse_renames(copies, dst, match):

716

"""given copies to context 'dst', finds renames from that context"""

716

"""given copies to context 'dst', finds renames from that context"""

717

# Even though we're not taking copies into account, 1:n rename situations

717

# Even though we're not taking copies into account, 1:n rename situations

718

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

718

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

719

# arbitrarily pick one of the renames.

719

# arbitrarily pick one of the renames.

720

r = {}

720

r = {}

721

for k, v in sorted(pycompat.iteritems(copies)):

721

for k, v in sorted(pycompat.iteritems(copies)):

722

if match and not match(v):

722

if match and not match(v):

723

continue

723

continue

724

# remove copies

724

# remove copies

725

if v in dst:

725

if v in dst:

726

continue

726

continue

727

r[v] = k

727

r[v] = k

728

return r

728

return r

729

730

731

def pathcopies(x, y, match=None):

731

def pathcopies(x, y, match=None):

732

"""find {dst@y: src@x} copy mapping for directed compare"""

732

"""find {dst@y: src@x} copy mapping for directed compare"""

733

repo = x._repo

733

repo = x._repo

734

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

734

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

735

if debug:

735

if debug:

736

repo.ui.debug(

736

repo.ui.debug(

737

b'debug.copies: searching copies from %s to %s\n' % (x, y)

737

b'debug.copies: searching copies from %s to %s\n' % (x, y)

738

)

738

)

739

if x == y or not x or not y:

739

if x == y or not x or not y:

740

return {}

740

return {}

741

if y.rev() is None and x == y.p1():

741

if y.rev() is None and x == y.p1():

742

if debug:

742

if debug:

743

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

743

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

744

# short-circuit to avoid issues with merge states

744

# short-circuit to avoid issues with merge states

745

return _dirstatecopies(repo, match)

745

return _dirstatecopies(repo, match)

746

a = y.ancestor(x)

746

a = y.ancestor(x)

747

if a == x:

747

if a == x:

748

if debug:

748

if debug:

749

repo.ui.debug(b'debug.copies: search mode: forward\n')

749

repo.ui.debug(b'debug.copies: search mode: forward\n')

750

copies = _forwardcopies(x, y, match=match)

750

copies = _forwardcopies(x, y, match=match)

751

elif a == y:

751

elif a == y:

752

if debug:

752

if debug:

753

repo.ui.debug(b'debug.copies: search mode: backward\n')

753

repo.ui.debug(b'debug.copies: search mode: backward\n')

754

copies = _backwardrenames(x, y, match=match)

754

copies = _backwardrenames(x, y, match=match)

755

else:

755

else:

756

if debug:

756

if debug:

757

repo.ui.debug(b'debug.copies: search mode: combined\n')

757

repo.ui.debug(b'debug.copies: search mode: combined\n')

758

base = None

758

base = None

759

if a.rev() != nullrev:

759

if a.rev() != nullrev:

760

base = x

760

base = x

761

x_copies = _forwardcopies(a, x)

761

x_copies = _forwardcopies(a, x)

762

y_copies = _forwardcopies(a, y, base, match=match)

762

y_copies = _forwardcopies(a, y, base, match=match)

763

same_keys = set(x_copies) & set(y_copies)

763

same_keys = set(x_copies) & set(y_copies)

764

for k in same_keys:

764

for k in same_keys:

765

if x_copies.get(k) == y_copies.get(k):

765

if x_copies.get(k) == y_copies.get(k):

766

del x_copies[k]

766

del x_copies[k]

767

del y_copies[k]

767

del y_copies[k]

768

x_backward_renames = _reverse_renames(x_copies, x, match)

768

x_backward_renames = _reverse_renames(x_copies, x, match)

769

copies = _chain(

769

copies = _chain(

770

x_backward_renames,

770

x_backward_renames,

771

y_copies,

771

y_copies,

772

)

772

)

773

_filter(x, y, copies)

773

_filter(x, y, copies)

774

return copies

774

return copies

775

776

777

def mergecopies(repo, c1, c2, base):

777

def mergecopies(repo, c1, c2, base):

778

"""

778

"""

779

Finds moves and copies between context c1 and c2 that are relevant for

779

Finds moves and copies between context c1 and c2 that are relevant for

780

merging. 'base' will be used as the merge base.

780

merging. 'base' will be used as the merge base.

781

782

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

782

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

783

files that were moved/ copied in one merge parent and modified in another.

783

files that were moved/ copied in one merge parent and modified in another.

784

For example:

784

For example:

785

786

o ---> 4 another commit

786

o ---> 4 another commit

787

|

787

|

788

| o ---> 3 commit that modifies a.txt

788

| o ---> 3 commit that modifies a.txt

789

| /

789

| /

790

o / ---> 2 commit that moves a.txt to b.txt

790

o / ---> 2 commit that moves a.txt to b.txt

791

|/

791

|/

792

o ---> 1 merge base

792

o ---> 1 merge base

793

794

If we try to rebase revision 3 on revision 4, since there is no a.txt in

794

If we try to rebase revision 3 on revision 4, since there is no a.txt in

795

revision 4, and if user have copytrace disabled, we prints the following

795

revision 4, and if user have copytrace disabled, we prints the following

796

message:

796

message:

797

798

```other changed <file> which local deleted```

798

```other changed <file> which local deleted```

799

800

Returns a tuple where:

800

Returns a tuple where:

801

802

"branch_copies" an instance of branch_copies.

802

"branch_copies" an instance of branch_copies.

803

804

"diverge" is a mapping of source name -> list of destination names

804

"diverge" is a mapping of source name -> list of destination names

805

for divergent renames.

805

for divergent renames.

806

807

This function calls different copytracing algorithms based on config.

807

This function calls different copytracing algorithms based on config.

808

"""

808

"""

809

# avoid silly behavior for update from empty dir

809

# avoid silly behavior for update from empty dir

810

if not c1 or not c2 or c1 == c2:

810

if not c1 or not c2 or c1 == c2:

811

return branch_copies(), branch_copies(), {}

811

return branch_copies(), branch_copies(), {}

812

813

narrowmatch = c1.repo().narrowmatch()

813

narrowmatch = c1.repo().narrowmatch()

814

815

# avoid silly behavior for parent -> working dir

815

# avoid silly behavior for parent -> working dir

816

if c2.node() is None and c1.node() == repo.dirstate.p1():

816

if c2.node() is None and c1.node() == repo.dirstate.p1():

817

return (

817

return (

818

branch_copies(_dirstatecopies(repo, narrowmatch)),

818

branch_copies(_dirstatecopies(repo, narrowmatch)),

819

branch_copies(),

819

branch_copies(),

820

{},

820

{},

821

)

821

)

822

823

copytracing = repo.ui.config(b'experimental', b'copytrace')

823

copytracing = repo.ui.config(b'experimental', b'copytrace')

824

if stringutil.parsebool(copytracing) is False:

824

if stringutil.parsebool(copytracing) is False:

825

# stringutil.parsebool() returns None when it is unable to parse the

825

# stringutil.parsebool() returns None when it is unable to parse the

826

# value, so we should rely on making sure copytracing is on such cases

826

# value, so we should rely on making sure copytracing is on such cases

827

return branch_copies(), branch_copies(), {}

827

return branch_copies(), branch_copies(), {}

828

829

if usechangesetcentricalgo(repo):

829

if usechangesetcentricalgo(repo):

830

# The heuristics don't make sense when we need changeset-centric algos

830

# The heuristics don't make sense when we need changeset-centric algos

831

return _fullcopytracing(repo, c1, c2, base)

831

return _fullcopytracing(repo, c1, c2, base)

832

833

# Copy trace disabling is explicitly below the node == p1 logic above

833

# Copy trace disabling is explicitly below the node == p1 logic above

834

# because the logic above is required for a simple copy to be kept across a

834

# because the logic above is required for a simple copy to be kept across a

835

# rebase.

835

# rebase.

836

if copytracing == b'heuristics':

836

if copytracing == b'heuristics':

837

# Do full copytracing if only non-public revisions are involved as

837

# Do full copytracing if only non-public revisions are involved as

838

# that will be fast enough and will also cover the copies which could

838

# that will be fast enough and will also cover the copies which could

839

# be missed by heuristics

839

# be missed by heuristics

840

if _isfullcopytraceable(repo, c1, base):

840

if _isfullcopytraceable(repo, c1, base):

841

return _fullcopytracing(repo, c1, c2, base)

841

return _fullcopytracing(repo, c1, c2, base)

842

return _heuristicscopytracing(repo, c1, c2, base)

842

return _heuristicscopytracing(repo, c1, c2, base)

843

else:

843

else:

844

return _fullcopytracing(repo, c1, c2, base)

844

return _fullcopytracing(repo, c1, c2, base)

845

846

847

def _isfullcopytraceable(repo, c1, base):

847

def _isfullcopytraceable(repo, c1, base):

848

"""Checks that if base, source and destination are all no-public branches,

848

"""Checks that if base, source and destination are all no-public branches,

849

if yes let's use the full copytrace algorithm for increased capabilities

849

if yes let's use the full copytrace algorithm for increased capabilities

850

since it will be fast enough.

850

since it will be fast enough.

851

852

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

852

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

853

number of changesets from c1 to base such that if number of changesets are

853

number of changesets from c1 to base such that if number of changesets are

854

more than the limit, full copytracing algorithm won't be used.

854

more than the limit, full copytracing algorithm won't be used.

855

"""

855

"""

856

if c1.rev() is None:

856

if c1.rev() is None:

857

c1 = c1.p1()

857

c1 = c1.p1()

858

if c1.mutable() and base.mutable():

858

if c1.mutable() and base.mutable():

859

sourcecommitlimit = repo.ui.configint(

859

sourcecommitlimit = repo.ui.configint(

860

b'experimental', b'copytrace.sourcecommitlimit'

860

b'experimental', b'copytrace.sourcecommitlimit'

861

)

861

)

862

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

862

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

863

return commits < sourcecommitlimit

863

return commits < sourcecommitlimit

864

return False

864

return False

865

866

867

def _checksinglesidecopies(

867

def _checksinglesidecopies(

868

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

868

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

869

):

869

):

870

if src not in m2:

870

if src not in m2:

871

# deleted on side 2

871

# deleted on side 2

872

if src not in m1:

872

if src not in m1:

873

# renamed on side 1, deleted on side 2

873

# renamed on side 1, deleted on side 2

874

renamedelete[src] = dsts1

874

renamedelete[src] = dsts1

875

elif src not in mb:

875

elif src not in mb:

876

# Work around the "short-circuit to avoid issues with merge states"

876

# Work around the "short-circuit to avoid issues with merge states"

877

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

877

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

878

# destination doesn't exist in y.

878

# destination doesn't exist in y.

879

pass

879

pass

880

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

880

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

881

return

881

return

882

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

882

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

883

# modified on side 2

883

# modified on side 2

884

for dst in dsts1:

884

for dst in dsts1:

885

copy[dst] = src

885

copy[dst] = src

886

887

888

class branch_copies(object):

888

class branch_copies(object):

889

"""Information about copies made on one side of a merge/graft.

889

"""Information about copies made on one side of a merge/graft.

890

891

"copy" is a mapping from destination name -> source name,

891

"copy" is a mapping from destination name -> source name,

892

where source is in c1 and destination is in c2 or vice-versa.

892

where source is in c1 and destination is in c2 or vice-versa.

893

894

"movewithdir" is a mapping from source name -> destination name,

894

"movewithdir" is a mapping from source name -> destination name,

895

where the file at source present in one context but not the other

895

where the file at source present in one context but not the other

896

needs to be moved to destination by the merge process, because the

896

needs to be moved to destination by the merge process, because the

897

other context moved the directory it is in.

897

other context moved the directory it is in.

898

899

"renamedelete" is a mapping of source name -> list of destination

899

"renamedelete" is a mapping of source name -> list of destination

900

names for files deleted in c1 that were renamed in c2 or vice-versa.

900

names for files deleted in c1 that were renamed in c2 or vice-versa.

901

902

"dirmove" is a mapping of detected source dir -> destination dir renames.

902

"dirmove" is a mapping of detected source dir -> destination dir renames.

903

This is needed for handling changes to new files previously grafted into

903

This is needed for handling changes to new files previously grafted into

904

renamed directories.

904

renamed directories.

905

"""

905

"""

906

907

def __init__(

907

def __init__(

908

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

908

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

909

):

909

):

910

self.copy = {} if copy is None else copy

910

self.copy = {} if copy is None else copy

911

self.renamedelete = {} if renamedelete is None else renamedelete

911

self.renamedelete = {} if renamedelete is None else renamedelete

912

self.dirmove = {} if dirmove is None else dirmove

912

self.dirmove = {} if dirmove is None else dirmove

913

self.movewithdir = {} if movewithdir is None else movewithdir

913

self.movewithdir = {} if movewithdir is None else movewithdir

914

915

def __repr__(self):

915

def __repr__(self):

916

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

916

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

917

self.copy,

917

self.copy,

918

self.renamedelete,

918

self.renamedelete,

919

self.dirmove,

919

self.dirmove,

920

self.movewithdir,

920

self.movewithdir,

921

)

921

)

922

923

924

def _fullcopytracing(repo, c1, c2, base):

924

def _fullcopytracing(repo, c1, c2, base):

925

"""The full copytracing algorithm which finds all the new files that were

925

"""The full copytracing algorithm which finds all the new files that were

926

added from merge base up to the top commit and for each file it checks if

926

added from merge base up to the top commit and for each file it checks if

927

this file was copied from another file.

927

this file was copied from another file.

928

929

This is pretty slow when a lot of changesets are involved but will track all

929

This is pretty slow when a lot of changesets are involved but will track all

930

the copies.

930

the copies.

931

"""

931

"""

932

m1 = c1.manifest()

932

m1 = c1.manifest()

933

m2 = c2.manifest()

933

m2 = c2.manifest()

934

mb = base.manifest()

934

mb = base.manifest()

935

936

copies1 = pathcopies(base, c1)

936

copies1 = pathcopies(base, c1)

937

copies2 = pathcopies(base, c2)

937

copies2 = pathcopies(base, c2)

938

939

if not (copies1 or copies2):

939

if not (copies1 or copies2):

940

return branch_copies(), branch_copies(), {}

940

return branch_copies(), branch_copies(), {}

941

942

inversecopies1 = {}

942

inversecopies1 = {}

943

inversecopies2 = {}

943

inversecopies2 = {}

944

for dst, src in copies1.items():

944

for dst, src in copies1.items():

945

inversecopies1.setdefault(src, []).append(dst)

945

inversecopies1.setdefault(src, []).append(dst)

946

for dst, src in copies2.items():

946

for dst, src in copies2.items():

947

inversecopies2.setdefault(src, []).append(dst)

947

inversecopies2.setdefault(src, []).append(dst)

948

949

copy1 = {}

949

copy1 = {}

950

copy2 = {}

950

copy2 = {}

951

diverge = {}

951

diverge = {}

952

renamedelete1 = {}

952

renamedelete1 = {}

953

renamedelete2 = {}

953

renamedelete2 = {}

954

allsources = set(inversecopies1) | set(inversecopies2)

954

allsources = set(inversecopies1) | set(inversecopies2)

955

for src in allsources:

955

for src in allsources:

956

dsts1 = inversecopies1.get(src)

956

dsts1 = inversecopies1.get(src)

957

dsts2 = inversecopies2.get(src)

957

dsts2 = inversecopies2.get(src)

958

if dsts1 and dsts2:

958

if dsts1 and dsts2:

959

# copied/renamed on both sides

959

# copied/renamed on both sides

960

if src not in m1 and src not in m2:

960

if src not in m1 and src not in m2:

961

# renamed on both sides

961

# renamed on both sides

962

dsts1 = set(dsts1)

962

dsts1 = set(dsts1)

963

dsts2 = set(dsts2)

963

dsts2 = set(dsts2)

964

# If there's some overlap in the rename destinations, we

964

# If there's some overlap in the rename destinations, we

965

# consider it not divergent. For example, if side 1 copies 'a'

965

# consider it not divergent. For example, if side 1 copies 'a'

966

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

966

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

967

# and 'd' and deletes 'a'.

967

# and 'd' and deletes 'a'.

968

if dsts1 & dsts2:

968

if dsts1 & dsts2:

969

for dst in dsts1 & dsts2:

969

for dst in dsts1 & dsts2:

970

copy1[dst] = src

970

copy1[dst] = src

971

copy2[dst] = src

971

copy2[dst] = src

972

else:

972

else:

973

diverge[src] = sorted(dsts1 | dsts2)

973

diverge[src] = sorted(dsts1 | dsts2)

974

elif src in m1 and src in m2:

974

elif src in m1 and src in m2:

975

# copied on both sides

975

# copied on both sides

976

dsts1 = set(dsts1)

976

dsts1 = set(dsts1)

977

dsts2 = set(dsts2)

977

dsts2 = set(dsts2)

978

for dst in dsts1 & dsts2:

978

for dst in dsts1 & dsts2:

979

copy1[dst] = src

979

copy1[dst] = src

980

copy2[dst] = src

980

copy2[dst] = src

981

# TODO: Handle cases where it was renamed on one side and copied

981

# TODO: Handle cases where it was renamed on one side and copied

982

# on the other side

982

# on the other side

983

elif dsts1:

983

elif dsts1:

984

# copied/renamed only on side 1

984

# copied/renamed only on side 1

985

_checksinglesidecopies(

985

_checksinglesidecopies(

986

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

986

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

987

)

987

)

988

elif dsts2:

988

elif dsts2:

989

# copied/renamed only on side 2

989

# copied/renamed only on side 2

990

_checksinglesidecopies(

990

_checksinglesidecopies(

991

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

991

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

992

)

992

)

993

994

# find interesting file sets from manifests

994

# find interesting file sets from manifests

995

cache = []

995

cache = []

996

997

def _get_addedfiles(idx):

997

def _get_addedfiles(idx):

998

if not cache:

998

if not cache:

999

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

999

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

1000

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

1000

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

1001

u1 = sorted(addedinm1 - addedinm2)

1001

u1 = sorted(addedinm1 - addedinm2)

1002

u2 = sorted(addedinm2 - addedinm1)

1002

u2 = sorted(addedinm2 - addedinm1)

1003

cache.extend((u1, u2))

1003

cache.extend((u1, u2))

1004

return cache[idx]

1004

return cache[idx]

1005

1006

u1fn = lambda: _get_addedfiles(0)

1006

u1fn = lambda: _get_addedfiles(0)

1007

u2fn = lambda: _get_addedfiles(1)

1007

u2fn = lambda: _get_addedfiles(1)

1008

if repo.ui.debugflag:

1008

if repo.ui.debugflag:

1009

u1 = u1fn()

1009

u1 = u1fn()

1010

u2 = u2fn()

1010

u2 = u2fn()

1011

1012

header = b" unmatched files in %s"

1012

header = b" unmatched files in %s"

1013

if u1:

1013

if u1:

1014

repo.ui.debug(

1014

repo.ui.debug(

1015

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

1015

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

1016

)

1016

)

1017

if u2:

1017

if u2:

1018

repo.ui.debug(

1018

repo.ui.debug(

1019

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

1019

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

1020

)

1020

)

1021

1022

renamedeleteset = set()

1022

renamedeleteset = set()

1023

divergeset = set()

1023

divergeset = set()

1024

for dsts in diverge.values():

1024

for dsts in diverge.values():

1025

divergeset.update(dsts)

1025

divergeset.update(dsts)

1026

for dsts in renamedelete1.values():

1026

for dsts in renamedelete1.values():

1027

renamedeleteset.update(dsts)

1027

renamedeleteset.update(dsts)

1028

for dsts in renamedelete2.values():

1028

for dsts in renamedelete2.values():

1029

renamedeleteset.update(dsts)

1029

renamedeleteset.update(dsts)

1030

1031

repo.ui.debug(

1031

repo.ui.debug(

1032

b" all copies found (* = to merge, ! = divergent, "

1032

b" all copies found (* = to merge, ! = divergent, "

1033

b"% = renamed and deleted):\n"

1033

b"% = renamed and deleted):\n"

1034

)

1034

)

1035

for side, copies in ((b"local", copies1), (b"remote", copies2)):

1035

for side, copies in ((b"local", copies1), (b"remote", copies2)):

1036

if not copies:

1036

if not copies:

1037

continue

1037

continue

1038

repo.ui.debug(b" on %s side:\n" % side)

1038

repo.ui.debug(b" on %s side:\n" % side)

1039

for f in sorted(copies):

1039

for f in sorted(copies):

1040

note = b""

1040

note = b""

1041

if f in copy1 or f in copy2:

1041

if f in copy1 or f in copy2:

1042

note += b"*"

1042

note += b"*"

1043

if f in divergeset:

1043

if f in divergeset:

1044

note += b"!"

1044

note += b"!"

1045

if f in renamedeleteset:

1045

if f in renamedeleteset:

1046

note += b"%"

1046

note += b"%"

1047

repo.ui.debug(

1047

repo.ui.debug(

1048

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

1048

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

1049

)

1049

)

1050

del renamedeleteset

1050

del renamedeleteset

1051

del divergeset

1051

del divergeset

1052

1053

repo.ui.debug(b" checking for directory renames\n")

1053

repo.ui.debug(b" checking for directory renames\n")

1054

1055

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

1055

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

1056

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

1056

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

1057

1058

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1058

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1059

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1059

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1060

1061

return branch_copies1, branch_copies2, diverge

1061

return branch_copies1, branch_copies2, diverge

1062

1063

1064

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1064

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1065

"""Finds moved directories and files that should move with them.

1065

"""Finds moved directories and files that should move with them.

1066

1067

ctx: the context for one of the sides

1067

ctx: the context for one of the sides

1068

copy: files copied on the same side (as ctx)

1068

copy: files copied on the same side (as ctx)

1069

fullcopy: files copied on the same side (as ctx), including those that

1069

fullcopy: files copied on the same side (as ctx), including those that

1070

merge.manifestmerge() won't care about

1070

merge.manifestmerge() won't care about

1071

addedfilesfn: function returning added files on the other side (compared to

1071

addedfilesfn: function returning added files on the other side (compared to

1072

ctx)

1072

ctx)

1073

"""

1073

"""

1074

# generate a directory move map

1074

# generate a directory move map

1075

invalid = set()

1075

invalid = set()

1076

dirmove = {}

1076

dirmove = {}

1077

1078

# examine each file copy for a potential directory move, which is

1078

# examine each file copy for a potential directory move, which is

1079

# when all the files in a directory are moved to a new directory

1079

# when all the files in a directory are moved to a new directory

1080

for dst, src in pycompat.iteritems(fullcopy):

1080

for dst, src in pycompat.iteritems(fullcopy):

1081

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1081

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1082

if dsrc in invalid:

1082

if dsrc in invalid:

1083

# already seen to be uninteresting

1083

# already seen to be uninteresting

1084

continue

1084

continue

1085

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1085

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1086

# directory wasn't entirely moved locally

1086

# directory wasn't entirely moved locally

1087

invalid.add(dsrc)

1087

invalid.add(dsrc)

1088

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1088

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1089

# files from the same directory moved to two different places

1089

# files from the same directory moved to two different places

1090

invalid.add(dsrc)

1090

invalid.add(dsrc)

1091

else:

1091

else:

1092

# looks good so far

1092

# looks good so far

1093

dirmove[dsrc] = ddst

1093

dirmove[dsrc] = ddst

1094

1095

for i in invalid:

1095

for i in invalid:

1096

if i in dirmove:

1096

if i in dirmove:

1097

del dirmove[i]

1097

del dirmove[i]

1098

del invalid

1098

del invalid

1099

1100

if not dirmove:

1100

if not dirmove:

1101

return {}, {}

1101

return {}, {}

1102

1103

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1103

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1104

1105

for d in dirmove:

1105

for d in dirmove:

1106

repo.ui.debug(

1106

repo.ui.debug(

1107

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1107

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1108

)

1108

)

1109

1110

# Sort the directories in reverse order, so we find children first

1110

# Sort the directories in reverse order, so we find children first

1111

# For example, if dir1/ was renamed to dir2/, and dir1/subdir1/

1111

# For example, if dir1/ was renamed to dir2/, and dir1/subdir1/

1112

# was renamed to dir2/subdir2/, we want to move dir1/subdir1/file

1112

# was renamed to dir2/subdir2/, we want to move dir1/subdir1/file

1113

# to dir2/subdir2/file (not dir2/subdir1/file)

1113

# to dir2/subdir2/file (not dir2/subdir1/file)

1114

dirmove_children_first = sorted(dirmove, reverse=True)

1114

dirmove_children_first = sorted(dirmove, reverse=True)

1115

1116

movewithdir = {}

1116

movewithdir = {}

1117

# check unaccounted nonoverlapping files against directory moves

1117

# check unaccounted nonoverlapping files against directory moves

1118

for f in addedfilesfn():

1118

for f in addedfilesfn():

1119

if f not in fullcopy:

1119

if f not in fullcopy:

1120

for d in dirmove_children_first:

1120

for d in dirmove_children_first:

1121

if f.startswith(d):

1121

if f.startswith(d):

1122

# new file added in a directory that was moved, move it

1122

# new file added in a directory that was moved, move it

1123

df = dirmove[d] + f[len(d) :]

1123

df = dirmove[d] + f[len(d) :]

1124

if df not in copy:

1124

if df not in copy:

1125

movewithdir[f] = df

1125

movewithdir[f] = df

1126

repo.ui.debug(

1126

repo.ui.debug(

1127

b" pending file src: '%s' -> dst: '%s'\n"

1127

b" pending file src: '%s' -> dst: '%s'\n"

1128

% (f, df)

1128

% (f, df)

1129

)

1129

)

1130

break

1130

break

1131

1132

return dirmove, movewithdir

1132

return dirmove, movewithdir

1133

1134

1135

def _heuristicscopytracing(repo, c1, c2, base):

1135

def _heuristicscopytracing(repo, c1, c2, base):

1136

"""Fast copytracing using filename heuristics

1136

"""Fast copytracing using filename heuristics

1137

1138

Assumes that moves or renames are of following two types:

1138

Assumes that moves or renames are of following two types:

1139

1140

1) Inside a directory only (same directory name but different filenames)

1140

1) Inside a directory only (same directory name but different filenames)

1141

2) Move from one directory to another

1141

2) Move from one directory to another

1142

(same filenames but different directory names)

1142

(same filenames but different directory names)

1143

1144

Works only when there are no merge commits in the "source branch".

1144

Works only when there are no merge commits in the "source branch".

1145

Source branch is commits from base up to c2 not including base.

1145

Source branch is commits from base up to c2 not including base.

1146

1147

If merge is involved it fallbacks to _fullcopytracing().

1147

If merge is involved it fallbacks to _fullcopytracing().

1148

1149

Can be used by setting the following config:

1149

Can be used by setting the following config:

1150

1151

[experimental]

1151

[experimental]

1152

copytrace = heuristics

1152

copytrace = heuristics

1153

1154

In some cases the copy/move candidates found by heuristics can be very large

1154

In some cases the copy/move candidates found by heuristics can be very large

1155

in number and that will make the algorithm slow. The number of possible

1155

in number and that will make the algorithm slow. The number of possible

1156

candidates to check can be limited by using the config

1156

candidates to check can be limited by using the config

1157

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1157

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1158

"""

1158

"""

1159

1160

if c1.rev() is None:

1160

if c1.rev() is None:

1161

c1 = c1.p1()

1161

c1 = c1.p1()

1162

if c2.rev() is None:

1162

if c2.rev() is None:

1163

c2 = c2.p1()

1163

c2 = c2.p1()

1164

1165

changedfiles = set()

1165

changedfiles = set()

1166

m1 = c1.manifest()

1166

m1 = c1.manifest()

1167

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1167

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1168

# If base is not in c2 branch, we switch to fullcopytracing

1168

# If base is not in c2 branch, we switch to fullcopytracing

1169

repo.ui.debug(

1169

repo.ui.debug(

1170

b"switching to full copytracing as base is not "

1170

b"switching to full copytracing as base is not "

1171

b"an ancestor of c2\n"

1171

b"an ancestor of c2\n"

1172

)

1172

)

1173

return _fullcopytracing(repo, c1, c2, base)

1173

return _fullcopytracing(repo, c1, c2, base)

1174

1175

ctx = c2

1175

ctx = c2

1176

while ctx != base:

1176

while ctx != base:

1177

if len(ctx.parents()) == 2:

1177

if len(ctx.parents()) == 2:

1178

# To keep things simple let's not handle merges

1178

# To keep things simple let's not handle merges

1179

repo.ui.debug(b"switching to full copytracing because of merges\n")

1179

repo.ui.debug(b"switching to full copytracing because of merges\n")

1180

return _fullcopytracing(repo, c1, c2, base)

1180

return _fullcopytracing(repo, c1, c2, base)

1181

changedfiles.update(ctx.files())

1181

changedfiles.update(ctx.files())

1182

ctx = ctx.p1()

1182

ctx = ctx.p1()

1183

1184

copies2 = {}

1184

copies2 = {}

1185

cp = _forwardcopies(base, c2)

1185

cp = _forwardcopies(base, c2)

1186

for dst, src in pycompat.iteritems(cp):

1186

for dst, src in pycompat.iteritems(cp):

1187

if src in m1:

1187

if src in m1:

1188

copies2[dst] = src

1188

copies2[dst] = src

1189

1190

# file is missing if it isn't present in the destination, but is present in

1190

# file is missing if it isn't present in the destination, but is present in

1191

# the base and present in the source.

1191

# the base and present in the source.

1192

# Presence in the base is important to exclude added files, presence in the

1192

# Presence in the base is important to exclude added files, presence in the

1193

# source is important to exclude removed files.

1193

# source is important to exclude removed files.

1194

filt = lambda f: f not in m1 and f in base and f in c2

1194

filt = lambda f: f not in m1 and f in base and f in c2

1195

missingfiles = [f for f in changedfiles if filt(f)]

1195

missingfiles = [f for f in changedfiles if filt(f)]

1196

1197

copies1 = {}

1197

copies1 = {}

1198

if missingfiles:

1198

if missingfiles:

1199

basenametofilename = collections.defaultdict(list)

1199

basenametofilename = collections.defaultdict(list)

1200

dirnametofilename = collections.defaultdict(list)

1200

dirnametofilename = collections.defaultdict(list)

1201

1202

for f in m1.filesnotin(base.manifest()):

1202

for f in m1.filesnotin(base.manifest()):

1203

basename = os.path.basename(f)

1203

basename = os.path.basename(f)

1204

dirname = os.path.dirname(f)

1204

dirname = os.path.dirname(f)

1205

basenametofilename[basename].append(f)

1205

basenametofilename[basename].append(f)

1206

dirnametofilename[dirname].append(f)

1206

dirnametofilename[dirname].append(f)

1207

1208

for f in missingfiles:

1208

for f in missingfiles:

1209

basename = os.path.basename(f)

1209

basename = os.path.basename(f)

1210

dirname = os.path.dirname(f)

1210

dirname = os.path.dirname(f)

1211

samebasename = basenametofilename[basename]

1211

samebasename = basenametofilename[basename]

1212

samedirname = dirnametofilename[dirname]

1212

samedirname = dirnametofilename[dirname]

1213

movecandidates = samebasename + samedirname

1213

movecandidates = samebasename + samedirname

1214

# f is guaranteed to be present in c2, that's why

1214

# f is guaranteed to be present in c2, that's why

1215

# c2.filectx(f) won't fail

1215

# c2.filectx(f) won't fail

1216

f2 = c2.filectx(f)

1216

f2 = c2.filectx(f)

1217

# we can have a lot of candidates which can slow down the heuristics

1217

# we can have a lot of candidates which can slow down the heuristics

1218

# config value to limit the number of candidates moves to check

1218

# config value to limit the number of candidates moves to check

1219

maxcandidates = repo.ui.configint(

1219

maxcandidates = repo.ui.configint(

1220

b'experimental', b'copytrace.movecandidateslimit'

1220

b'experimental', b'copytrace.movecandidateslimit'

1221

)

1221

)

1222

1223

if len(movecandidates) > maxcandidates:

1223

if len(movecandidates) > maxcandidates:

1224

repo.ui.status(

1224

repo.ui.status(

1225

_(

1225

_(

1226

b"skipping copytracing for '%s', more "

1226

b"skipping copytracing for '%s', more "

1227

b"candidates than the limit: %d\n"

1227

b"candidates than the limit: %d\n"

1228

)

1228

)

1229

% (f, len(movecandidates))

1229

% (f, len(movecandidates))

1230

)

1230

)

1231

continue

1231

continue

1232

1233

for candidate in movecandidates:

1233

for candidate in movecandidates:

1234

f1 = c1.filectx(candidate)

1234

f1 = c1.filectx(candidate)

1235

if _related(f1, f2):

1235

if _related(f1, f2):

1236

# if there are a few related copies then we'll merge

1236

# if there are a few related copies then we'll merge

1237

# changes into all of them. This matches the behaviour

1237

# changes into all of them. This matches the behaviour

1238

# of upstream copytracing

1238

# of upstream copytracing

1239

copies1[candidate] = f

1239

copies1[candidate] = f

1240

1241

return branch_copies(copies1), branch_copies(copies2), {}

1241

return branch_copies(copies1), branch_copies(copies2), {}

1242

1243

1244

def _related(f1, f2):

1244

def _related(f1, f2):

1245

"""return True if f1 and f2 filectx have a common ancestor

1245

"""return True if f1 and f2 filectx have a common ancestor

1246

1247

Walk back to common ancestor to see if the two files originate

1247

Walk back to common ancestor to see if the two files originate

1248

from the same file. Since workingfilectx's rev() is None it messes

1248

from the same file. Since workingfilectx's rev() is None it messes

1249

up the integer comparison logic, hence the pre-step check for

1249

up the integer comparison logic, hence the pre-step check for

1250

None (f1 and f2 can only be workingfilectx's initially).

1250

None (f1 and f2 can only be workingfilectx's initially).

1251

"""

1251

"""

1252

1253

if f1 == f2:

1253

if f1 == f2:

1254

return True # a match

1254

return True # a match

1255

1256

g1, g2 = f1.ancestors(), f2.ancestors()

1256

g1, g2 = f1.ancestors(), f2.ancestors()

1257

try:

1257

try:

1258

f1r, f2r = f1.linkrev(), f2.linkrev()

1258

f1r, f2r = f1.linkrev(), f2.linkrev()

1259

1260

if f1r is None:

1260

if f1r is None:

1261

f1 = next(g1)

1261

f1 = next(g1)

1262

if f2r is None:

1262

if f2r is None:

1263

f2 = next(g2)

1263

f2 = next(g2)

1264

1265

while True:

1265

while True:

1266

f1r, f2r = f1.linkrev(), f2.linkrev()

1266

f1r, f2r = f1.linkrev(), f2.linkrev()

1267

if f1r > f2r:

1267

if f1r > f2r:

1268

f1 = next(g1)

1268

f1 = next(g1)

1269

elif f2r > f1r:

1269

elif f2r > f1r:

1270

f2 = next(g2)

1270

f2 = next(g2)

1271

else: # f1 and f2 point to files in the same linkrev

1271

else: # f1 and f2 point to files in the same linkrev

1272

return f1 == f2 # true if they point to the same file

1272

return f1 == f2 # true if they point to the same file

1273

except StopIteration:

1273

except StopIteration:

1274

return False

1274

return False

1275

1276

1277

def graftcopies(wctx, ctx, base):

1277

def graftcopies(wctx, ctx, base):

1278

"""reproduce copies between base and ctx in the wctx

1278

"""reproduce copies between base and ctx in the wctx

1279

1280

Unlike mergecopies(), this function will only consider copies between base

1280

Unlike mergecopies(), this function will only consider copies between base

1281

and ctx; it will ignore copies between base and wctx. Also unlike

1281

and ctx; it will ignore copies between base and wctx. Also unlike

1282

mergecopies(), this function will apply copies to the working copy (instead

1282

mergecopies(), this function will apply copies to the working copy (instead

1283

of just returning information about the copies). That makes it cheaper

1283

of just returning information about the copies). That makes it cheaper

1284

(especially in the common case of base==ctx.p1()) and useful also when

1284

(especially in the common case of base==ctx.p1()) and useful also when

1285

experimental.copytrace=off.

1285

experimental.copytrace=off.

1286

1287

merge.update() will have already marked most copies, but it will only

1287

merge.update() will have already marked most copies, but it will only

1288

mark copies if it thinks the source files are related (see

1288

mark copies if it thinks the source files are related (see

1289

merge._related()). It will also not mark copies if the file wasn't modified

1289

merge._related()). It will also not mark copies if the file wasn't modified

1290

on the local side. This function adds the copies that were "missed"

1290

on the local side. This function adds the copies that were "missed"

1291

by merge.update().

1291

by merge.update().

1292

"""

1292

"""

1293

new_copies = pathcopies(base, ctx)

1293

new_copies = pathcopies(base, ctx)

1294

parent = wctx.p1()

1294

parent = wctx.p1()

1295

_filter(parent, wctx, new_copies)

1295

_filter(parent, wctx, new_copies)

1296

# Extra filtering to drop copy information for files that existed before

1296

# Extra filtering to drop copy information for files that existed before

1297

# the graft. This is to handle the case of grafting a rename onto a commit

1297

# the graft. This is to handle the case of grafting a rename onto a commit

1298

# that already has the rename. Otherwise the presence of copy information

1298

# that already has the rename. Otherwise the presence of copy information

1299

# would result in the creation of an empty commit where we would prefer to

1299

# would result in the creation of an empty commit where we would prefer to

1300

# not create one.

1300

# not create one.

1301

for dest, __ in list(new_copies.items()):

1301

for dest, __ in list(new_copies.items()):

1302

if dest in parent:

1302

if dest in parent:

1303

del new_copies[dest]

1303

del new_copies[dest]

1304

for dst, src in pycompat.iteritems(new_copies):

1304

for dst, src in pycompat.iteritems(new_copies):

1305

wctx[dst].markcopied(src)

1305

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import nullrev
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 6
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     if k == v:  # case 3
                         del t[k]
                     elif v not in src:  # case 5
                         # remove copies from files that didn't exist
                         del t[k]
                     elif k not in dst:  # case 1
                         # remove copies to files that were then removed
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if not ds.get_entry(k).tracked or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().rev() == nullrev:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
                     missing = list(b.walk(match))
                     # _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(b'debug.copies:      searching all files: %d\n' % len(missing))
                 else:
                     missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(
                             b'debug.copies:      missing files to search: %d\n'
                             % len(missing)
                         )
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 if rustmod is not None:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         return (p1, p2, raw)
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         else:
                             changes = None
                         return (p1, p2, changes)
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
-                    # this deal with the special case mentionned in the [1] footnotes. We
+                    # this deal with the special case mentioned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
-                    # We iterate over the childre, gathering previous collected data for
+                    # We iterate over the children, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
-                    # An interresting property of `children_count` is that it only contains
+                    # An interesting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     with repo.changelog.reading():
                         return _combine_changeset_copies(
                             revs,
                             children_count,
                             b.rev(),
                             revinfo,
                             match,
                             isancestor,
                             multi_thread,
                         )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None:
                     final_copies = rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, multi_thread
                     )
                 else:
                     isancestor = cached_is_ancestor(isancestor)
                     all_copies = {}
                     # iterate over all the "children" side of copy tracing "edge"
                     for current_rev in revs:
                         p1, p2, changes = revinfo(current_rev)
                         current_copies = None
                         # iterate over all parents to chain the existing data with the
                         # data from the parent → child edge.
                         for parent, parent_rev in ((1, p1), (2, p2)):
                             if parent_rev == nullrev:
                                 continue
                             remaining_children = children_count.get(parent_rev)
                             if remaining_children is None:
                                 continue
                             remaining_children -= 1
                             children_count[parent_rev] = remaining_children
                             if remaining_children:
                                 copies = all_copies.get(parent_rev, None)
                             else:
                                 copies = all_copies.pop(parent_rev, None)
                             if copies is None:
                                 # this is a root
                                 newcopies = copies = {}
                             elif remaining_children:
                                 newcopies = copies.copy()
                             else:
                                 newcopies = copies
                             # chain the data in the edge with the existing data
                             if changes is not None:
                                 childcopies = {}
                                 if parent == 1:
                                     childcopies = changes.copied_from_p1
                                 elif parent == 2:
                                     childcopies = changes.copied_from_p2
                                 if childcopies:
                                     newcopies = copies.copy()
                                     for dest, source in pycompat.iteritems(childcopies):
                                         prev = copies.get(source)
                                         if prev is not None and prev[1] is not None:
                                             source = prev[1]
                                         newcopies[dest] = (current_rev, source)
                                     assert newcopies is not copies
                                 if changes.removed:
                                     for f in changes.removed:
                                         if f in newcopies:
                                             if newcopies is copies:
                                                 # copy on write to avoid affecting potential other
                                                 # branches.  when there are no other branches, this
                                                 # could be avoided.
                                                 newcopies = copies.copy()
                                             newcopies[f] = (current_rev, None)
                             # check potential need to combine the data from another parent (for
                             # that child). See comment below for details.
                             if current_copies is None:
                                 current_copies = newcopies
                             else:
                                 # we are the second parent to work on c, we need to merge our
                                 # work with the other.
                                 #
                                 # In case of conflict, parent 1 take precedence over parent 2.
                                 # This is an arbitrary choice made anew when implementing
                                 # changeset based copies. It was made without regards with
                                 # potential filelog related behavior.
                                 assert parent == 2
                                 current_copies = _merge_copies_dict(
                                     newcopies,
                                     current_copies,
                                     isancestor,
                                     changes,
                                     current_rev,
                                 )
                         all_copies[current_rev] = current_copies
                     # filter out internal details and return a {dest: source mapping}
                     final_copies = {}
                     for dest, (tt, source) in all_copies[targetrev].items():
                         if source is not None:
                             final_copies[dest] = source
                 if not alwaysmatch:
                     for filename in list(final_copies.keys()):
                         if not match(filename):
                             del final_copies[filename]
                 return final_copies
             # constant to decide which side to pick with _merge_copies_dict
             PICK_MINOR = 0
             PICK_MAJOR = 1
             PICK_EITHER = 2
             def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         pick, overwrite = _compare_values(
                             changes, isancestor, dest, other, value
                         )
                         if overwrite:
                             if pick == PICK_MAJOR:
                                 minor[dest] = (current_merge, value[1])
                             else:
                                 minor[dest] = (current_merge, other[1])
                         elif pick == PICK_MAJOR:
                             minor[dest] = value
                 return minor
             def _compare_values(changes, isancestor, dest, minor, major):
                 """compare two value within a _merge_copies_dict loop iteration
                 return (pick, overwrite).
                 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
                 - overwrite is True if pick is a return of an ambiguity that needs resolution.
                 """
                 major_tt, major_value = major
                 minor_tt, minor_value = minor
                 if major_tt == minor_tt:
                     # if it comes from the same revision it must be the same value
                     assert major_value == minor_value
                     return PICK_EITHER, False
                 elif (
                     changes is not None
                     and minor_value is not None
                     and major_value is None
                     and dest in changes.salvaged
                 ):
                     # In this case, a deletion was reverted, the "alive" value overwrite
                     # the deleted one.
                     return PICK_MINOR, True
                 elif (
                     changes is not None
                     and major_value is not None
                     and minor_value is None
                     and dest in changes.salvaged
                 ):
                     # In this case, a deletion was reverted, the "alive" value overwrite
                     # the deleted one.
                     return PICK_MAJOR, True
                 elif isancestor(minor_tt, major_tt):
                     if changes is not None and dest in changes.merged:
                         # change to dest happened on the branch without copy-source change,
                         # so both source are valid and "major" wins.
                         return PICK_MAJOR, True
                     else:
                         return PICK_MAJOR, False
                 elif isancestor(major_tt, minor_tt):
                     if changes is not None and dest in changes.merged:
                         # change to dest happened on the branch without copy-source change,
                         # so both source are valid and "major" wins.
                         return PICK_MAJOR, True
                     else:
                         return PICK_MINOR, False
                 elif minor_value is None:
                     # in case of conflict, the "alive" side wins.
                     return PICK_MAJOR, True
                 elif major_value is None:
                     # in case of conflict, the "alive" side wins.
                     return PICK_MINOR, True
                 else:
                     # in case of conflict where both side are alive, major wins.
                     return PICK_MAJOR, True
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != repo.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 """find renames from a to b"""
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 copies = _forwardcopies(b, a)
                 return _reverse_renames(copies, a, match)
             def _reverse_renames(copies, dst, match):
                 """given copies to context 'dst', finds renames from that context"""
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 r = {}
                 for k, v in sorted(pycompat.iteritems(copies)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in dst:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
                     x_copies = _forwardcopies(a, x)
                     y_copies = _forwardcopies(a, y, base, match=match)
                     same_keys = set(x_copies) & set(y_copies)
                     for k in same_keys:
                         if x_copies.get(k) == y_copies.get(k):
                             del x_copies[k]
                             del y_copies[k]
                     x_backward_renames = _reverse_renames(x_copies, x, match)
                     copies = _chain(
                         x_backward_renames,
                         y_copies,
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 # Sort the directories in reverse order, so we find children first
                 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
                 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
                 # to dir2/subdir2/file (not dir2/subdir1/file)
                 dirmove_children_first = sorted(dirmove, reverse=True)
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove_children_first:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 parent = wctx.p1()
                 _filter(parent, wctx, new_copies)
                 # Extra filtering to drop copy information for files that existed before
                 # the graft. This is to handle the case of grafting a rename onto a commit
                 # that already has the rename. Otherwise the presence of copy information
                 # would result in the creation of an empty commit where we would prefer to
                 # not create one.
                 for dest, __ in list(new_copies.items()):
                     if dest in parent:
                         del new_copies[dest]
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)