upstream/mercurial-mirror Commit - r47394:eca88f5f

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import (

15

from .node import (

16

nullid,

16

nullid,

17

nullrev,

17

nullrev,

18

)

18

)

19

20

from . import (

20

from . import (

21

match as matchmod,

21

match as matchmod,

22

pathutil,

22

pathutil,

23

policy,

23

policy,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

from .revlogutils import (

31

from .revlogutils import (

32

flagutil,

32

flagutil,

33

sidedata as sidedatamod,

33

sidedata as sidedatamod,

34

)

34

)

35

36

rustmod = policy.importrust("copy_tracing")

36

rustmod = policy.importrust("copy_tracing")

37

38

39

def _filter(src, dst, t):

39

def _filter(src, dst, t):

40

"""filters out invalid copies after chaining"""

40

"""filters out invalid copies after chaining"""

41

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

44

# in the following table (not including trivial cases). For example, case 6

44

# in the following table (not including trivial cases). For example, case 6

45

# is where a file existed in 'src' and remained under that name in 'mid' and

45

# is where a file existed in 'src' and remained under that name in 'mid' and

46

# then was renamed between 'mid' and 'dst'.

46

# then was renamed between 'mid' and 'dst'.

47

#

47

#

48

# case src mid dst result

48

# case src mid dst result

49

# 1 x y - -

49

# 1 x y - -

50

# 2 x y y x->y

50

# 2 x y y x->y

51

# 3 x y x -

51

# 3 x y x -

52

# 4 x y z x->z

52

# 4 x y z x->z

53

# 5 - x y -

53

# 5 - x y -

54

# 6 x x y x->y

54

# 6 x x y x->y

55

#

55

#

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

58

# between 5 and 6, so it includes all cases in its result.

58

# between 5 and 6, so it includes all cases in its result.

59

# Cases 1, 3, and 5 are then removed by _filter().

59

# Cases 1, 3, and 5 are then removed by _filter().

60

61

for k, v in list(t.items()):

61

for k, v in list(t.items()):

62

if k == v: # case 3

62

if k == v: # case 3

63

del t[k]

63

del t[k]

64

elif v not in src: # case 5

64

elif v not in src: # case 5

65

# remove copies from files that didn't exist

65

# remove copies from files that didn't exist

66

del t[k]

66

del t[k]

67

elif k not in dst: # case 1

67

elif k not in dst: # case 1

68

# remove copies to files that were then removed

68

# remove copies to files that were then removed

69

del t[k]

69

del t[k]

70

71

72

def _chain(prefix, suffix):

72

def _chain(prefix, suffix):

73

"""chain two sets of copies 'prefix' and 'suffix'"""

73

"""chain two sets of copies 'prefix' and 'suffix'"""

74

result = prefix.copy()

74

result = prefix.copy()

75

for key, value in pycompat.iteritems(suffix):

75

for key, value in pycompat.iteritems(suffix):

76

result[key] = prefix.get(value, value)

76

result[key] = prefix.get(value, value)

77

return result

77

return result

78

79

80

def _tracefile(fctx, am, basemf):

80

def _tracefile(fctx, am, basemf):

81

"""return file context that is the ancestor of fctx present in ancestor

81

"""return file context that is the ancestor of fctx present in ancestor

82

manifest am

82

manifest am

83

84

Note: we used to try and stop after a given limit, however checking if that

84

Note: we used to try and stop after a given limit, however checking if that

85

limit is reached turned out to be very expensive. we are better off

85

limit is reached turned out to be very expensive. we are better off

86

disabling that feature."""

86

disabling that feature."""

87

88

for f in fctx.ancestors():

88

for f in fctx.ancestors():

89

path = f.path()

89

path = f.path()

90

if am.get(path, None) == f.filenode():

90

if am.get(path, None) == f.filenode():

91

return path

91

return path

92

if basemf and basemf.get(path, None) == f.filenode():

92

if basemf and basemf.get(path, None) == f.filenode():

93

return path

93

return path

94

95

96

def _dirstatecopies(repo, match=None):

96

def _dirstatecopies(repo, match=None):

97

ds = repo.dirstate

97

ds = repo.dirstate

98

c = ds.copies().copy()

98

c = ds.copies().copy()

99

for k in list(c):

99

for k in list(c):

100

if ds[k] not in b'anm' or (match and not match(k)):

100

if ds[k] not in b'anm' or (match and not match(k)):

101

del c[k]

101

del c[k]

102

return c

102

return c

103

104

105

def _computeforwardmissing(a, b, match=None):

105

def _computeforwardmissing(a, b, match=None):

106

"""Computes which files are in b but not a.

106

"""Computes which files are in b but not a.

107

This is its own function so extensions can easily wrap this call to see what

107

This is its own function so extensions can easily wrap this call to see what

108

files _forwardcopies is about to process.

108

files _forwardcopies is about to process.

109

"""

109

"""

110

ma = a.manifest()

110

ma = a.manifest()

111

mb = b.manifest()

111

mb = b.manifest()

112

return mb.filesnotin(ma, match=match)

112

return mb.filesnotin(ma, match=match)

113

114

115

def usechangesetcentricalgo(repo):

115

def usechangesetcentricalgo(repo):

116

"""Checks if we should use changeset-centric copy algorithms"""

116

"""Checks if we should use changeset-centric copy algorithms"""

117

if repo.filecopiesmode == b'changeset-sidedata':

117

if repo.filecopiesmode == b'changeset-sidedata':

118

return True

118

return True

119

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

119

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

120

changesetsource = (b'changeset-only', b'compatibility')

120

changesetsource = (b'changeset-only', b'compatibility')

121

return readfrom in changesetsource

121

return readfrom in changesetsource

122

123

124

def _committedforwardcopies(a, b, base, match):

124

def _committedforwardcopies(a, b, base, match):

125

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

125

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

126

# files might have to be traced back to the fctx parent of the last

126

# files might have to be traced back to the fctx parent of the last

127

# one-side-only changeset, but not further back than that

127

# one-side-only changeset, but not further back than that

128

repo = a._repo

128

repo = a._repo

129

130

if usechangesetcentricalgo(repo):

130

if usechangesetcentricalgo(repo):

131

return _changesetforwardcopies(a, b, match)

131

return _changesetforwardcopies(a, b, match)

132

133

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

133

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

134

dbg = repo.ui.debug

134

dbg = repo.ui.debug

135

if debug:

135

if debug:

136

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

136

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

137

am = a.manifest()

137

am = a.manifest()

138

basemf = None if base is None else base.manifest()

138

basemf = None if base is None else base.manifest()

139

140

# find where new files came from

140

# find where new files came from

141

# we currently don't try to find where old files went, too expensive

141

# we currently don't try to find where old files went, too expensive

142

# this means we can miss a case like 'hg rm b; hg cp a b'

142

# this means we can miss a case like 'hg rm b; hg cp a b'

143

cm = {}

143

cm = {}

144

145

# Computing the forward missing is quite expensive on large manifests, since

145

# Computing the forward missing is quite expensive on large manifests, since

146

# it compares the entire manifests. We can optimize it in the common use

146

# it compares the entire manifests. We can optimize it in the common use

147

# case of computing what copies are in a commit versus its parent (like

147

# case of computing what copies are in a commit versus its parent (like

148

# during a rebase or histedit). Note, we exclude merge commits from this

148

# during a rebase or histedit). Note, we exclude merge commits from this

149

# optimization, since the ctx.files() for a merge commit is not correct for

149

# optimization, since the ctx.files() for a merge commit is not correct for

150

# this comparison.

150

# this comparison.

151

forwardmissingmatch = match

151

forwardmissingmatch = match

152

if b.p1() == a and b.p2().node() == nullid:

152

if b.p1() == a and b.p2().node() == nullid:

153

filesmatcher = matchmod.exact(b.files())

153

filesmatcher = matchmod.exact(b.files())

154

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

154

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

155

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

155

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

156

missing = list(b.walk(match))

156

missing = list(b.walk(match))

157

# _computeforwardmissing(a, b, match=forwardmissingmatch)

157

# _computeforwardmissing(a, b, match=forwardmissingmatch)

158

if debug:

158

if debug:

159

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

159

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

160

else:

160

else:

161

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

161

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

162

if debug:

162

if debug:

163

dbg(

163

dbg(

164

b'debug.copies: missing files to search: %d\n'

164

b'debug.copies: missing files to search: %d\n'

165

% len(missing)

165

% len(missing)

166

)

166

)

167

168

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

168

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

169

170

for f in sorted(missing):

170

for f in sorted(missing):

171

if debug:

171

if debug:

172

dbg(b'debug.copies: tracing file: %s\n' % f)

172

dbg(b'debug.copies: tracing file: %s\n' % f)

173

fctx = b[f]

173

fctx = b[f]

174

fctx._ancestrycontext = ancestrycontext

174

fctx._ancestrycontext = ancestrycontext

175

176

if debug:

176

if debug:

177

start = util.timer()

177

start = util.timer()

178

opath = _tracefile(fctx, am, basemf)

178

opath = _tracefile(fctx, am, basemf)

179

if opath:

179

if opath:

180

if debug:

180

if debug:

181

dbg(b'debug.copies: rename of: %s\n' % opath)

181

dbg(b'debug.copies: rename of: %s\n' % opath)

182

cm[f] = opath

182

cm[f] = opath

183

if debug:

183

if debug:

184

dbg(

184

dbg(

185

b'debug.copies: time: %f seconds\n'

185

b'debug.copies: time: %f seconds\n'

186

% (util.timer() - start)

186

% (util.timer() - start)

187

)

187

)

188

return cm

188

return cm

189

190

191

def _revinfo_getter(repo, match):

191

def _revinfo_getter(repo, match):

192

"""returns a function that returns the following data given a <rev>"

192

"""returns a function that returns the following data given a <rev>"

193

194

* p1: revision number of first parent

194

* p1: revision number of first parent

195

* p2: revision number of first parent

195

* p2: revision number of first parent

196

* changes: a ChangingFiles object

196

* changes: a ChangingFiles object

197

"""

197

"""

198

cl = repo.changelog

198

cl = repo.changelog

199

parents = cl.parentrevs

199

parents = cl.parentrevs

200

flags = cl.flags

200

flags = cl.flags

201

202

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

202

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

203

204

changelogrevision = cl.changelogrevision

204

changelogrevision = cl.changelogrevision

205

206

if rustmod is not None:

206

if rustmod is not None:

207

208

def revinfo(rev):

208

def revinfo(rev):

209

p1, p2 = parents(rev)

209

p1, p2 = parents(rev)

210

if flags(rev) & HASCOPIESINFO:

210

if flags(rev) & HASCOPIESINFO:

211

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

211

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

212

else:

212

else:

213

raw = None

213

raw = None

214

return (p1, p2, raw)

214

return (p1, p2, raw)

215

216

else:

216

else:

217

218

def revinfo(rev):

218

def revinfo(rev):

219

p1, p2 = parents(rev)

219

p1, p2 = parents(rev)

220

if flags(rev) & HASCOPIESINFO:

220

if flags(rev) & HASCOPIESINFO:

221

changes = changelogrevision(rev).changes

221

changes = changelogrevision(rev).changes

222

else:

222

else:

223

changes = None

223

changes = None

224

return (p1, p2, changes)

224

return (p1, p2, changes)

225

226

return revinfo

226

return revinfo

227

228

229

def cached_is_ancestor(is_ancestor):

229

def cached_is_ancestor(is_ancestor):

230

"""return a cached version of is_ancestor"""

230

"""return a cached version of is_ancestor"""

231

cache = {}

231

cache = {}

232

233

def _is_ancestor(anc, desc):

233

def _is_ancestor(anc, desc):

234

if anc > desc:

234

if anc > desc:

235

return False

235

return False

236

elif anc == desc:

236

elif anc == desc:

237

return True

237

return True

238

key = (anc, desc)

238

key = (anc, desc)

239

ret = cache.get(key)

239

ret = cache.get(key)

240

if ret is None:

240

if ret is None:

241

ret = cache[key] = is_ancestor(anc, desc)

241

ret = cache[key] = is_ancestor(anc, desc)

242

return ret

242

return ret

243

244

return _is_ancestor

244

return _is_ancestor

245

246

247

def _changesetforwardcopies(a, b, match):

247

def _changesetforwardcopies(a, b, match):

248

if a.rev() in (nullrev, b.rev()):

248

if a.rev() in (nullrev, b.rev()):

249

return {}

249

return {}

250

251

repo = a.repo().unfiltered()

251

repo = a.repo().unfiltered()

252

children = {}

252

children = {}

253

254

cl = repo.changelog

254

cl = repo.changelog

255

isancestor = cl.isancestorrev

255

isancestor = cl.isancestorrev

256

257

# To track rename from "A" to B, we need to gather all parent → children

257

# To track rename from "A" to B, we need to gather all parent → children

258

# edges that are contains in `::B` but not in `::A`.

258

# edges that are contains in `::B` but not in `::A`.

259

#

259

#

260

#

260

#

261

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

261

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

262

# ::a`) and also all the "roots point", ie the parents of the exclusive set

262

# ::a`) and also all the "roots point", ie the parents of the exclusive set

263

# that belong to ::a. These are exactly all the revisions needed to express

263

# that belong to ::a. These are exactly all the revisions needed to express

264

# the parent → children we need to combine.

264

# the parent → children we need to combine.

265

#

265

#

266

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

266

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

267

# excluding paths that leads to roots that are not ancestors of `a`. We

267

# excluding paths that leads to roots that are not ancestors of `a`. We

268

# keep this out of the explanation because it is hard enough without this special case..

268

# keep this out of the explanation because it is hard enough without this special case..

269

270

parents = cl._uncheckedparentrevs

270

parents = cl._uncheckedparentrevs

271

graph_roots = (nullrev, nullrev)

271

graph_roots = (nullrev, nullrev)

272

273

ancestors = cl.ancestors([a.rev()], inclusive=True)

273

ancestors = cl.ancestors([a.rev()], inclusive=True)

274

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

274

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

275

roots = set()

275

roots = set()

276

has_graph_roots = False

276

has_graph_roots = False

277

multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')

277

multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')

278

279

# iterate over `only(B, A)`

279

# iterate over `only(B, A)`

280

for r in revs:

280

for r in revs:

281

ps = parents(r)

281

ps = parents(r)

282

if ps == graph_roots:

282

if ps == graph_roots:

283

has_graph_roots = True

283

has_graph_roots = True

284

else:

284

else:

285

p1, p2 = ps

285

p1, p2 = ps

286

287

# find all the "root points" (see larger comment above)

287

# find all the "root points" (see larger comment above)

288

if p1 != nullrev and p1 in ancestors:

288

if p1 != nullrev and p1 in ancestors:

289

roots.add(p1)

289

roots.add(p1)

290

if p2 != nullrev and p2 in ancestors:

290

if p2 != nullrev and p2 in ancestors:

291

roots.add(p2)

291

roots.add(p2)

292

if not roots:

292

if not roots:

293

# no common revision to track copies from

293

# no common revision to track copies from

294

return {}

294

return {}

295

if has_graph_roots:

295

if has_graph_roots:

296

# this deal with the special case mentionned in the [1] footnotes. We

296

# this deal with the special case mentionned in the [1] footnotes. We

297

# must filter out revisions that leads to non-common graphroots.

297

# must filter out revisions that leads to non-common graphroots.

298

roots = list(roots)

298

roots = list(roots)

299

m = min(roots)

299

m = min(roots)

300

h = [b.rev()]

300

h = [b.rev()]

301

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

301

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

302

roots_to_head = set(roots_to_head)

302

roots_to_head = set(roots_to_head)

303

revs = [r for r in revs if r in roots_to_head]

303

revs = [r for r in revs if r in roots_to_head]

304

305

if repo.filecopiesmode == b'changeset-sidedata':

305

if repo.filecopiesmode == b'changeset-sidedata':

306

# When using side-data, we will process the edges "from" the children.

306

# When using side-data, we will process the edges "from" the children.

307

# We iterate over the childre, gathering previous collected data for

307

# We iterate over the childre, gathering previous collected data for

308

# the parents. Do know when the parents data is no longer necessary, we

308

# the parents. Do know when the parents data is no longer necessary, we

309

# keep a counter of how many children each revision has.

309

# keep a counter of how many children each revision has.

310

#

310

#

311

# An interresting property of `children_count` is that it only contains

311

# An interresting property of `children_count` is that it only contains

312

# revision that will be relevant for a edge of the graph. So if a

312

# revision that will be relevant for a edge of the graph. So if a

313

# children has parent not in `children_count`, that edges should not be

313

# children has parent not in `children_count`, that edges should not be

314

# processed.

314

# processed.

315

children_count = dict((r, 0) for r in roots)

315

children_count = dict((r, 0) for r in roots)

316

for r in revs:

316

for r in revs:

317

for p in cl.parentrevs(r):

317

for p in cl.parentrevs(r):

318

if p == nullrev:

318

if p == nullrev:

319

continue

319

continue

320

children_count[r] = 0

320

children_count[r] = 0

321

if p in children_count:

321

if p in children_count:

322

children_count[p] += 1

322

children_count[p] += 1

323

revinfo = _revinfo_getter(repo, match)

323

revinfo = _revinfo_getter(repo, match)

324

return _combine_changeset_copies(

324

return _combine_changeset_copies(

325

revs,

325

revs,

326

children_count,

326

children_count,

327

b.rev(),

327

b.rev(),

328

revinfo,

328

revinfo,

329

match,

329

match,

330

isancestor,

330

isancestor,

331

multi_thread,

331

multi_thread,

332

)

332

)

333

else:

333

else:

334

# When not using side-data, we will process the edges "from" the parent.

334

# When not using side-data, we will process the edges "from" the parent.

335

# so we need a full mapping of the parent -> children relation.

335

# so we need a full mapping of the parent -> children relation.

336

children = dict((r, []) for r in roots)

336

children = dict((r, []) for r in roots)

337

for r in revs:

337

for r in revs:

338

for p in cl.parentrevs(r):

338

for p in cl.parentrevs(r):

339

if p == nullrev:

339

if p == nullrev:

340

continue

340

continue

341

children[r] = []

341

children[r] = []

342

if p in children:

342

if p in children:

343

children[p].append(r)

343

children[p].append(r)

344

x = revs.pop()

344

x = revs.pop()

345

assert x == b.rev()

345

assert x == b.rev()

346

revs.extend(roots)

346

revs.extend(roots)

347

revs.sort()

347

revs.sort()

348

349

revinfo = _revinfo_getter_extra(repo)

349

revinfo = _revinfo_getter_extra(repo)

350

return _combine_changeset_copies_extra(

350

return _combine_changeset_copies_extra(

351

revs, children, b.rev(), revinfo, match, isancestor

351

revs, children, b.rev(), revinfo, match, isancestor

352

)

352

)

353

354

355

def _combine_changeset_copies(

355

def _combine_changeset_copies(

356

revs, children_count, targetrev, revinfo, match, isancestor, multi_thread

356

revs, children_count, targetrev, revinfo, match, isancestor, multi_thread

357

):

357

):

358

"""combine the copies information for each item of iterrevs

358

"""combine the copies information for each item of iterrevs

359

360

revs: sorted iterable of revision to visit

360

revs: sorted iterable of revision to visit

361

children_count: a {parent: <number-of-relevant-children>} mapping.

361

children_count: a {parent: <number-of-relevant-children>} mapping.

362

targetrev: the final copies destination revision (not in iterrevs)

362

targetrev: the final copies destination revision (not in iterrevs)

363

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

363

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

364

match: a matcher

364

match: a matcher

365

366

It returns the aggregated copies information for `targetrev`.

366

It returns the aggregated copies information for `targetrev`.

367

"""

367

"""

368

369

alwaysmatch = match.always()

369

alwaysmatch = match.always()

370

371

if rustmod is not None:

371

if rustmod is not None:

372

final_copies = rustmod.combine_changeset_copies(

372

final_copies = rustmod.combine_changeset_copies(

373

list(revs), children_count, targetrev, revinfo, multi_thread

373

list(revs), children_count, targetrev, revinfo, multi_thread

374

)

374

)

375

else:

375

else:

376

isancestor = cached_is_ancestor(isancestor)

376

isancestor = cached_is_ancestor(isancestor)

377

378

all_copies = {}

378

all_copies = {}

379

# iterate over all the "children" side of copy tracing "edge"

379

# iterate over all the "children" side of copy tracing "edge"

380

for current_rev in revs:

380

for current_rev in revs:

381

p1, p2, changes = revinfo(current_rev)

381

p1, p2, changes = revinfo(current_rev)

382

current_copies = None

382

current_copies = None

383

# iterate over all parents to chain the existing data with the

383

# iterate over all parents to chain the existing data with the

384

# data from the parent → child edge.

384

# data from the parent → child edge.

385

for parent, parent_rev in ((1, p1), (2, p2)):

385

for parent, parent_rev in ((1, p1), (2, p2)):

386

if parent_rev == nullrev:

386

if parent_rev == nullrev:

387

continue

387

continue

388

remaining_children = children_count.get(parent_rev)

388

remaining_children = children_count.get(parent_rev)

389

if remaining_children is None:

389

if remaining_children is None:

390

continue

390

continue

391

remaining_children -= 1

391

remaining_children -= 1

392

children_count[parent_rev] = remaining_children

392

children_count[parent_rev] = remaining_children

393

if remaining_children:

393

if remaining_children:

394

copies = all_copies.get(parent_rev, None)

394

copies = all_copies.get(parent_rev, None)

395

else:

395

else:

396

copies = all_copies.pop(parent_rev, None)

396

copies = all_copies.pop(parent_rev, None)

397

398

if copies is None:

398

if copies is None:

399

# this is a root

399

# this is a root

400

newcopies = copies = {}

400

newcopies = copies = {}

401

elif remaining_children:

401

elif remaining_children:

402

newcopies = copies.copy()

402

newcopies = copies.copy()

403

else:

403

else:

404

newcopies = copies

404

newcopies = copies

405

# chain the data in the edge with the existing data

405

# chain the data in the edge with the existing data

406

if changes is not None:

406

if changes is not None:

407

childcopies = {}

407

childcopies = {}

408

if parent == 1:

408

if parent == 1:

409

childcopies = changes.copied_from_p1

409

childcopies = changes.copied_from_p1

410

elif parent == 2:

410

elif parent == 2:

411

childcopies = changes.copied_from_p2

411

childcopies = changes.copied_from_p2

412

413

if childcopies:

413

if childcopies:

414

newcopies = copies.copy()

414

newcopies = copies.copy()

415

for dest, source in pycompat.iteritems(childcopies):

415

for dest, source in pycompat.iteritems(childcopies):

416

prev = copies.get(source)

416

prev = copies.get(source)

417

if prev is not None and prev[1] is not None:

417

if prev is not None and prev[1] is not None:

418

source = prev[1]

418

source = prev[1]

419

newcopies[dest] = (current_rev, source)

419

newcopies[dest] = (current_rev, source)

420

assert newcopies is not copies

420

assert newcopies is not copies

421

if changes.removed:

421

if changes.removed:

422

for f in changes.removed:

422

for f in changes.removed:

423

if f in newcopies:

423

if f in newcopies:

424

if newcopies is copies:

424

if newcopies is copies:

425

# copy on write to avoid affecting potential other

425

# copy on write to avoid affecting potential other

426

# branches. when there are no other branches, this

426

# branches. when there are no other branches, this

427

# could be avoided.

427

# could be avoided.

428

newcopies = copies.copy()

428

newcopies = copies.copy()

429

newcopies[f] = (current_rev, None)

429

newcopies[f] = (current_rev, None)

430

# check potential need to combine the data from another parent (for

430

# check potential need to combine the data from another parent (for

431

# that child). See comment below for details.

431

# that child). See comment below for details.

432

if current_copies is None:

432

if current_copies is None:

433

current_copies = newcopies

433

current_copies = newcopies

434

else:

434

else:

435

# we are the second parent to work on c, we need to merge our

435

# we are the second parent to work on c, we need to merge our

436

# work with the other.

436

# work with the other.

437

#

437

#

438

# In case of conflict, parent 1 take precedence over parent 2.

438

# In case of conflict, parent 1 take precedence over parent 2.

439

# This is an arbitrary choice made anew when implementing

439

# This is an arbitrary choice made anew when implementing

440

# changeset based copies. It was made without regards with

440

# changeset based copies. It was made without regards with

441

# potential filelog related behavior.

441

# potential filelog related behavior.

442

assert parent == 2

442

assert parent == 2

443

current_copies = _merge_copies_dict(

443

current_copies = _merge_copies_dict(

444

newcopies,

444

newcopies,

445

current_copies,

445

current_copies,

446

isancestor,

446

isancestor,

447

changes,

447

changes,

448

current_rev,

448

current_rev,

449

)

449

)

450

all_copies[current_rev] = current_copies

450

all_copies[current_rev] = current_copies

451

452

# filter out internal details and return a {dest: source mapping}

452

# filter out internal details and return a {dest: source mapping}

453

final_copies = {}

453

final_copies = {}

454

for dest, (tt, source) in all_copies[targetrev].items():

454

for dest, (tt, source) in all_copies[targetrev].items():

455

if source is not None:

455

if source is not None:

456

final_copies[dest] = source

456

final_copies[dest] = source

457

if not alwaysmatch:

457

if not alwaysmatch:

458

for filename in list(final_copies.keys()):

458

for filename in list(final_copies.keys()):

459

if not match(filename):

459

if not match(filename):

460

del final_copies[filename]

460

del final_copies[filename]

461

return final_copies

461

return final_copies

462

463

464

# constant to decide which side to pick with _merge_copies_dict

464

# constant to decide which side to pick with _merge_copies_dict

465

PICK_MINOR = 0

465

PICK_MINOR = 0

466

PICK_MAJOR = 1

466

PICK_MAJOR = 1

467

PICK_EITHER = 2

467

PICK_EITHER = 2

468

469

470

def _merge_copies_dict(minor, major, isancestor, changes, current_merge):

470

def _merge_copies_dict(minor, major, isancestor, changes, current_merge):

471

"""merge two copies-mapping together, minor and major

471

"""merge two copies-mapping together, minor and major

472

473

In case of conflict, value from "major" will be picked.

473

In case of conflict, value from "major" will be picked.

474

475

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

475

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

476

ancestors of `high_rev`,

476

ancestors of `high_rev`,

477

478

- `ismerged(path)`: callable return True if `path` have been merged in the

478

- `ismerged(path)`: callable return True if `path` have been merged in the

479

current revision,

479

current revision,

480

481

return the resulting dict (in practice, the "minor" object, updated)

481

return the resulting dict (in practice, the "minor" object, updated)

482

"""

482

"""

483

for dest, value in major.items():

483

for dest, value in major.items():

484

other = minor.get(dest)

484

other = minor.get(dest)

485

if other is None:

485

if other is None:

486

minor[dest] = value

486

minor[dest] = value

487

else:

487

else:

488

pick, overwrite = _compare_values(

488

pick, overwrite = _compare_values(

489

changes, isancestor, dest, other, value

489

changes, isancestor, dest, other, value

490

)

490

)

491

if overwrite:

491

if overwrite:

492

if pick == PICK_MAJOR:

492

if pick == PICK_MAJOR:

493

minor[dest] = (current_merge, value[1])

493

minor[dest] = (current_merge, value[1])

494

else:

494

else:

495

minor[dest] = (current_merge, other[1])

495

minor[dest] = (current_merge, other[1])

496

elif pick == PICK_MAJOR:

496

elif pick == PICK_MAJOR:

497

minor[dest] = value

497

minor[dest] = value

498

return minor

498

return minor

499

500

501

def _compare_values(changes, isancestor, dest, minor, major):

501

def _compare_values(changes, isancestor, dest, minor, major):

502

"""compare two value within a _merge_copies_dict loop iteration

502

"""compare two value within a _merge_copies_dict loop iteration

503

504

return (pick, overwrite).

504

return (pick, overwrite).

505

506

- pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER

506

- pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER

507

- overwrite is True if pick is a return of an ambiguity that needs resolution.

507

- overwrite is True if pick is a return of an ambiguity that needs resolution.

508

"""

508

"""

509

major_tt, major_value = major

509

major_tt, major_value = major

510

minor_tt, minor_value = minor

510

minor_tt, minor_value = minor

511

512

if major_tt == minor_tt:

512

if major_tt == minor_tt:

513

# if it comes from the same revision it must be the same value

513

# if it comes from the same revision it must be the same value

514

assert major_value == minor_value

514

assert major_value == minor_value

515

return PICK_EITHER, False

515

return PICK_EITHER, False

516

elif (

516

elif (

517

changes is not None

517

changes is not None

518

and minor_value is not None

518

and minor_value is not None

519

and major_value is None

519

and major_value is None

520

and dest in changes.salvaged

520

and dest in changes.salvaged

521

):

521

):

522

# In this case, a deletion was reverted, the "alive" value overwrite

522

# In this case, a deletion was reverted, the "alive" value overwrite

523

# the deleted one.

523

# the deleted one.

524

return PICK_MINOR, True

524

return PICK_MINOR, True

525

elif (

525

elif (

526

changes is not None

526

changes is not None

527

and major_value is not None

527

and major_value is not None

528

and minor_value is None

528

and minor_value is None

529

and dest in changes.salvaged

529

and dest in changes.salvaged

530

):

530

):

531

# In this case, a deletion was reverted, the "alive" value overwrite

531

# In this case, a deletion was reverted, the "alive" value overwrite

532

# the deleted one.

532

# the deleted one.

533

return PICK_MAJOR, True

533

return PICK_MAJOR, True

534

elif isancestor(minor_tt, major_tt):

534

elif isancestor(minor_tt, major_tt):

535

if changes is not None and dest in changes.merged:

535

if changes is not None and dest in changes.merged:

536

# change to dest happened on the branch without copy-source change,

536

# change to dest happened on the branch without copy-source change,

537

# so both source are valid and "major" wins.

537

# so both source are valid and "major" wins.

538

return PICK_MAJOR, True

538

return PICK_MAJOR, True

539

else:

539

else:

540

return PICK_MAJOR, False

540

return PICK_MAJOR, False

541

elif isancestor(major_tt, minor_tt):

541

elif isancestor(major_tt, minor_tt):

542

if changes is not None and dest in changes.merged:

542

if changes is not None and dest in changes.merged:

543

# change to dest happened on the branch without copy-source change,

543

# change to dest happened on the branch without copy-source change,

544

# so both source are valid and "major" wins.

544

# so both source are valid and "major" wins.

545

return PICK_MAJOR, True

545

return PICK_MAJOR, True

546

else:

546

else:

547

return PICK_MINOR, False

547

return PICK_MINOR, False

548

elif minor_value is None:

548

elif minor_value is None:

549

# in case of conflict, the "alive" side wins.

549

# in case of conflict, the "alive" side wins.

550

return PICK_MAJOR, True

550

return PICK_MAJOR, True

551

elif major_value is None:

551

elif major_value is None:

552

# in case of conflict, the "alive" side wins.

552

# in case of conflict, the "alive" side wins.

553

return PICK_MINOR, True

553

return PICK_MINOR, True

554

else:

554

else:

555

# in case of conflict where both side are alive, major wins.

555

# in case of conflict where both side are alive, major wins.

556

return PICK_MAJOR, True

556

return PICK_MAJOR, True

557

558

559

def _revinfo_getter_extra(repo):

559

def _revinfo_getter_extra(repo):

560

"""return a function that return multiple data given a <rev>"i

560

"""return a function that return multiple data given a <rev>"i

561

562

* p1: revision number of first parent

562

* p1: revision number of first parent

563

* p2: revision number of first parent

563

* p2: revision number of first parent

564

* p1copies: mapping of copies from p1

564

* p1copies: mapping of copies from p1

565

* p2copies: mapping of copies from p2

565

* p2copies: mapping of copies from p2

566

* removed: a list of removed files

566

* removed: a list of removed files

567

* ismerged: a callback to know if file was merged in that revision

567

* ismerged: a callback to know if file was merged in that revision

568

"""

568

"""

569

cl = repo.changelog

569

cl = repo.changelog

570

parents = cl.parentrevs

570

parents = cl.parentrevs

571

572

def get_ismerged(rev):

572

def get_ismerged(rev):

573

ctx = repo[rev]

573

ctx = repo[rev]

574

575

def ismerged(path):

575

def ismerged(path):

576

if path not in ctx.files():

576

if path not in ctx.files():

577

return False

577

return False

578

fctx = ctx[path]

578

fctx = ctx[path]

579

parents = fctx._filelog.parents(fctx._filenode)

579

parents = fctx._filelog.parents(fctx._filenode)

580

nb_parents = 0

580

nb_parents = 0

581

for n in parents:

581

for n in parents:

582

if n != nullid:

582

if n != nullid:

583

nb_parents += 1

583

nb_parents += 1

584

return nb_parents >= 2

584

return nb_parents >= 2

585

586

return ismerged

586

return ismerged

587

588

def revinfo(rev):

588

def revinfo(rev):

589

p1, p2 = parents(rev)

589

p1, p2 = parents(rev)

590

ctx = repo[rev]

590

ctx = repo[rev]

591

p1copies, p2copies = ctx._copies

591

p1copies, p2copies = ctx._copies

592

removed = ctx.filesremoved()

592

removed = ctx.filesremoved()

593

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

593

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

594

595

return revinfo

595

return revinfo

596

597

598

def _combine_changeset_copies_extra(

598

def _combine_changeset_copies_extra(

599

revs, children, targetrev, revinfo, match, isancestor

599

revs, children, targetrev, revinfo, match, isancestor

600

):

600

):

601

"""version of `_combine_changeset_copies` that works with the Google

601

"""version of `_combine_changeset_copies` that works with the Google

602

specific "extra" based storage for copy information"""

602

specific "extra" based storage for copy information"""

603

all_copies = {}

603

all_copies = {}

604

alwaysmatch = match.always()

604

alwaysmatch = match.always()

605

for r in revs:

605

for r in revs:

606

copies = all_copies.pop(r, None)

606

copies = all_copies.pop(r, None)

607

if copies is None:

607

if copies is None:

608

# this is a root

608

# this is a root

609

copies = {}

609

copies = {}

610

for i, c in enumerate(children[r]):

610

for i, c in enumerate(children[r]):

611

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

611

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

612

if r == p1:

612

if r == p1:

613

parent = 1

613

parent = 1

614

childcopies = p1copies

614

childcopies = p1copies

615

else:

615

else:

616

assert r == p2

616

assert r == p2

617

parent = 2

617

parent = 2

618

childcopies = p2copies

618

childcopies = p2copies

619

if not alwaysmatch:

619

if not alwaysmatch:

620

childcopies = {

620

childcopies = {

621

dst: src for dst, src in childcopies.items() if match(dst)

621

dst: src for dst, src in childcopies.items() if match(dst)

622

}

622

}

623

newcopies = copies

623

newcopies = copies

624

if childcopies:

624

if childcopies:

625

newcopies = copies.copy()

625

newcopies = copies.copy()

626

for dest, source in pycompat.iteritems(childcopies):

626

for dest, source in pycompat.iteritems(childcopies):

627

prev = copies.get(source)

627

prev = copies.get(source)

628

if prev is not None and prev[1] is not None:

628

if prev is not None and prev[1] is not None:

629

source = prev[1]

629

source = prev[1]

630

newcopies[dest] = (c, source)

630

newcopies[dest] = (c, source)

631

assert newcopies is not copies

631

assert newcopies is not copies

632

for f in removed:

632

for f in removed:

633

if f in newcopies:

633

if f in newcopies:

634

if newcopies is copies:

634

if newcopies is copies:

635

# copy on write to avoid affecting potential other

635

# copy on write to avoid affecting potential other

636

# branches. when there are no other branches, this

636

# branches. when there are no other branches, this

637

# could be avoided.

637

# could be avoided.

638

newcopies = copies.copy()

638

newcopies = copies.copy()

639

newcopies[f] = (c, None)

639

newcopies[f] = (c, None)

640

othercopies = all_copies.get(c)

640

othercopies = all_copies.get(c)

641

if othercopies is None:

641

if othercopies is None:

642

all_copies[c] = newcopies

642

all_copies[c] = newcopies

643

else:

643

else:

644

# we are the second parent to work on c, we need to merge our

644

# we are the second parent to work on c, we need to merge our

645

# work with the other.

645

# work with the other.

646

#

646

#

647

# In case of conflict, parent 1 take precedence over parent 2.

647

# In case of conflict, parent 1 take precedence over parent 2.

648

# This is an arbitrary choice made anew when implementing

648

# This is an arbitrary choice made anew when implementing

649

# changeset based copies. It was made without regards with

649

# changeset based copies. It was made without regards with

650

# potential filelog related behavior.

650

# potential filelog related behavior.

651

if parent == 1:

651

if parent == 1:

652

_merge_copies_dict_extra(

652

_merge_copies_dict_extra(

653

othercopies, newcopies, isancestor, ismerged

653

othercopies, newcopies, isancestor, ismerged

654

)

654

)

655

else:

655

else:

656

_merge_copies_dict_extra(

656

_merge_copies_dict_extra(

657

newcopies, othercopies, isancestor, ismerged

657

newcopies, othercopies, isancestor, ismerged

658

)

658

)

659

all_copies[c] = newcopies

659

all_copies[c] = newcopies

660

661

final_copies = {}

661

final_copies = {}

662

for dest, (tt, source) in all_copies[targetrev].items():

662

for dest, (tt, source) in all_copies[targetrev].items():

663

if source is not None:

663

if source is not None:

664

final_copies[dest] = source

664

final_copies[dest] = source

665

return final_copies

665

return final_copies

666

667

668

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

668

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

669

"""version of `_merge_copies_dict` that works with the Google

669

"""version of `_merge_copies_dict` that works with the Google

670

specific "extra" based storage for copy information"""

670

specific "extra" based storage for copy information"""

671

for dest, value in major.items():

671

for dest, value in major.items():

672

other = minor.get(dest)

672

other = minor.get(dest)

673

if other is None:

673

if other is None:

674

minor[dest] = value

674

minor[dest] = value

675

else:

675

else:

676

new_tt = value[0]

676

new_tt = value[0]

677

other_tt = other[0]

677

other_tt = other[0]

678

if value[1] == other[1]:

678

if value[1] == other[1]:

679

continue

679

continue

680

# content from "major" wins, unless it is older

680

# content from "major" wins, unless it is older

681

# than the branch point or there is a merge

681

# than the branch point or there is a merge

682

if (

682

if (

683

new_tt == other_tt

683

new_tt == other_tt

684

or not isancestor(new_tt, other_tt)

684

or not isancestor(new_tt, other_tt)

685

or ismerged(dest)

685

or ismerged(dest)

686

):

686

):

687

minor[dest] = value

687

minor[dest] = value

688

689

690

def _forwardcopies(a, b, base=None, match=None):

690

def _forwardcopies(a, b, base=None, match=None):

691

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

691

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

692

693

if base is None:

693

if base is None:

694

base = a

694

base = a

695

match = a.repo().narrowmatch(match)

695

match = a.repo().narrowmatch(match)

696

# check for working copy

696

# check for working copy

697

if b.rev() is None:

697

if b.rev() is None:

698

cm = _committedforwardcopies(a, b.p1(), base, match)

698

cm = _committedforwardcopies(a, b.p1(), base, match)

699

# combine copies from dirstate if necessary

699

# combine copies from dirstate if necessary

700

copies = _chain(cm, _dirstatecopies(b._repo, match))

700

copies = _chain(cm, _dirstatecopies(b._repo, match))

701

else:

701

else:

702

copies = _committedforwardcopies(a, b, base, match)

702

copies = _committedforwardcopies(a, b, base, match)

703

return copies

703

return copies

704

705

706

def _backwardrenames(a, b, match):

706

def _backwardrenames(a, b, match):

707

"""find renames from a to b"""

707

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

708

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

708

return {}

709

return {}

709

710

711

# We don't want to pass in "match" here, since that would filter

712

# the destination by it. Since we're reversing the copies, we want

713

# to filter the source instead.

714

copies = _forwardcopies(b, a)

715

return _reverse_renames(copies, a, match)

716

717

718

def _reverse_renames(copies, dst, match):

719

"""given copies to context 'dst', finds renames from that context"""

710

# Even though we're not taking copies into account, 1:n rename situations

720

# Even though we're not taking copies into account, 1:n rename situations

711

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

721

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

712

# arbitrarily pick one of the renames.

722

# arbitrarily pick one of the renames.

713

# We don't want to pass in "match" here, since that would filter

714

# the destination by it. Since we're reversing the copies, we want

715

# to filter the source instead.

716

f = _forwardcopies(b, a)

717

r = {}

723

r = {}

718

for k, v in sorted(pycompat.iteritems(f)):

724

for k, v in sorted(pycompat.iteritems(copies)):

719

if match and not match(v):

725

if match and not match(v):

720

continue

726

continue

721

# remove copies

727

# remove copies

722

if v in a:

728

if v in dst:

723

continue

729

continue

724

r[v] = k

730

r[v] = k

725

return r

731

return r

726

732

727

733

728

def pathcopies(x, y, match=None):

734

def pathcopies(x, y, match=None):

729

"""find {dst@y: src@x} copy mapping for directed compare"""

735

"""find {dst@y: src@x} copy mapping for directed compare"""

730

repo = x._repo

736

repo = x._repo

731

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

737

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

732

if debug:

738

if debug:

733

repo.ui.debug(

739

repo.ui.debug(

734

b'debug.copies: searching copies from %s to %s\n' % (x, y)

740

b'debug.copies: searching copies from %s to %s\n' % (x, y)

735

)

741

)

736

if x == y or not x or not y:

742

if x == y or not x or not y:

737

return {}

743

return {}

738

if y.rev() is None and x == y.p1():

744

if y.rev() is None and x == y.p1():

739

if debug:

745

if debug:

740

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

746

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

741

# short-circuit to avoid issues with merge states

747

# short-circuit to avoid issues with merge states

742

return _dirstatecopies(repo, match)

748

return _dirstatecopies(repo, match)

743

a = y.ancestor(x)

749

a = y.ancestor(x)

744

if a == x:

750

if a == x:

745

if debug:

751

if debug:

746

repo.ui.debug(b'debug.copies: search mode: forward\n')

752

repo.ui.debug(b'debug.copies: search mode: forward\n')

747

copies = _forwardcopies(x, y, match=match)

753

copies = _forwardcopies(x, y, match=match)

748

elif a == y:

754

elif a == y:

749

if debug:

755

if debug:

750

repo.ui.debug(b'debug.copies: search mode: backward\n')

756

repo.ui.debug(b'debug.copies: search mode: backward\n')

751

copies = _backwardrenames(x, y, match=match)

757

copies = _backwardrenames(x, y, match=match)

752

else:

758

else:

753

if debug:

759

if debug:

754

repo.ui.debug(b'debug.copies: search mode: combined\n')

760

repo.ui.debug(b'debug.copies: search mode: combined\n')

755

base = None

761

base = None

756

if a.rev() != nullrev:

762

if a.rev() != nullrev:

757

base = x

763

base = x

758

copies = _chain(

764

copies = _chain(

759

_backwardrenames(x, a, match=match),

765

_backwardrenames(x, a, match=match),

760

_forwardcopies(a, y, base, match=match),

766

_forwardcopies(a, y, base, match=match),

761

)

767

)

762

_filter(x, y, copies)

768

_filter(x, y, copies)

763

return copies

769

return copies

764

770

765

771

766

def mergecopies(repo, c1, c2, base):

772

def mergecopies(repo, c1, c2, base):

767

"""

773

"""

768

Finds moves and copies between context c1 and c2 that are relevant for

774

Finds moves and copies between context c1 and c2 that are relevant for

769

merging. 'base' will be used as the merge base.

775

merging. 'base' will be used as the merge base.

770

776

771

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

777

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

772

files that were moved/ copied in one merge parent and modified in another.

778

files that were moved/ copied in one merge parent and modified in another.

773

For example:

779

For example:

774

780

775

o ---> 4 another commit

781

o ---> 4 another commit

776

|

782

|

777

| o ---> 3 commit that modifies a.txt

783

| o ---> 3 commit that modifies a.txt

778

| /

784

| /

779

o / ---> 2 commit that moves a.txt to b.txt

785

o / ---> 2 commit that moves a.txt to b.txt

780

|/

786

|/

781

o ---> 1 merge base

787

o ---> 1 merge base

782

788

783

If we try to rebase revision 3 on revision 4, since there is no a.txt in

789

If we try to rebase revision 3 on revision 4, since there is no a.txt in

784

revision 4, and if user have copytrace disabled, we prints the following

790

revision 4, and if user have copytrace disabled, we prints the following

785

message:

791

message:

786

792

787

```other changed <file> which local deleted```

793

```other changed <file> which local deleted```

788

794

789

Returns a tuple where:

795

Returns a tuple where:

790

796

791

"branch_copies" an instance of branch_copies.

797

"branch_copies" an instance of branch_copies.

792

798

793

"diverge" is a mapping of source name -> list of destination names

799

"diverge" is a mapping of source name -> list of destination names

794

for divergent renames.

800

for divergent renames.

795

801

796

This function calls different copytracing algorithms based on config.

802

This function calls different copytracing algorithms based on config.

797

"""

803

"""

798

# avoid silly behavior for update from empty dir

804

# avoid silly behavior for update from empty dir

799

if not c1 or not c2 or c1 == c2:

805

if not c1 or not c2 or c1 == c2:

800

return branch_copies(), branch_copies(), {}

806

return branch_copies(), branch_copies(), {}

801

807

802

narrowmatch = c1.repo().narrowmatch()

808

narrowmatch = c1.repo().narrowmatch()

803

809

804

# avoid silly behavior for parent -> working dir

810

# avoid silly behavior for parent -> working dir

805

if c2.node() is None and c1.node() == repo.dirstate.p1():

811

if c2.node() is None and c1.node() == repo.dirstate.p1():

806

return (

812

return (

807

branch_copies(_dirstatecopies(repo, narrowmatch)),

813

branch_copies(_dirstatecopies(repo, narrowmatch)),

808

branch_copies(),

814

branch_copies(),

809

{},

815

{},

810

)

816

)

811

817

812

copytracing = repo.ui.config(b'experimental', b'copytrace')

818

copytracing = repo.ui.config(b'experimental', b'copytrace')

813

if stringutil.parsebool(copytracing) is False:

819

if stringutil.parsebool(copytracing) is False:

814

# stringutil.parsebool() returns None when it is unable to parse the

820

# stringutil.parsebool() returns None when it is unable to parse the

815

# value, so we should rely on making sure copytracing is on such cases

821

# value, so we should rely on making sure copytracing is on such cases

816

return branch_copies(), branch_copies(), {}

822

return branch_copies(), branch_copies(), {}

817

823

818

if usechangesetcentricalgo(repo):

824

if usechangesetcentricalgo(repo):

819

# The heuristics don't make sense when we need changeset-centric algos

825

# The heuristics don't make sense when we need changeset-centric algos

820

return _fullcopytracing(repo, c1, c2, base)

826

return _fullcopytracing(repo, c1, c2, base)

821

827

822

# Copy trace disabling is explicitly below the node == p1 logic above

828

# Copy trace disabling is explicitly below the node == p1 logic above

823

# because the logic above is required for a simple copy to be kept across a

829

# because the logic above is required for a simple copy to be kept across a

824

# rebase.

830

# rebase.

825

if copytracing == b'heuristics':

831

if copytracing == b'heuristics':

826

# Do full copytracing if only non-public revisions are involved as

832

# Do full copytracing if only non-public revisions are involved as

827

# that will be fast enough and will also cover the copies which could

833

# that will be fast enough and will also cover the copies which could

828

# be missed by heuristics

834

# be missed by heuristics

829

if _isfullcopytraceable(repo, c1, base):

835

if _isfullcopytraceable(repo, c1, base):

830

return _fullcopytracing(repo, c1, c2, base)

836

return _fullcopytracing(repo, c1, c2, base)

831

return _heuristicscopytracing(repo, c1, c2, base)

837

return _heuristicscopytracing(repo, c1, c2, base)

832

else:

838

else:

833

return _fullcopytracing(repo, c1, c2, base)

839

return _fullcopytracing(repo, c1, c2, base)

834

840

835

841

836

def _isfullcopytraceable(repo, c1, base):

842

def _isfullcopytraceable(repo, c1, base):

837

"""Checks that if base, source and destination are all no-public branches,

843

"""Checks that if base, source and destination are all no-public branches,

838

if yes let's use the full copytrace algorithm for increased capabilities

844

if yes let's use the full copytrace algorithm for increased capabilities

839

since it will be fast enough.

845

since it will be fast enough.

840

846

841

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

847

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

842

number of changesets from c1 to base such that if number of changesets are

848

number of changesets from c1 to base such that if number of changesets are

843

more than the limit, full copytracing algorithm won't be used.

849

more than the limit, full copytracing algorithm won't be used.

844

"""

850

"""

845

if c1.rev() is None:

851

if c1.rev() is None:

846

c1 = c1.p1()

852

c1 = c1.p1()

847

if c1.mutable() and base.mutable():

853

if c1.mutable() and base.mutable():

848

sourcecommitlimit = repo.ui.configint(

854

sourcecommitlimit = repo.ui.configint(

849

b'experimental', b'copytrace.sourcecommitlimit'

855

b'experimental', b'copytrace.sourcecommitlimit'

850

)

856

)

851

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

857

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

852

return commits < sourcecommitlimit

858

return commits < sourcecommitlimit

853

return False

859

return False

854

860

855

861

856

def _checksinglesidecopies(

862

def _checksinglesidecopies(

857

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

863

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

858

):

864

):

859

if src not in m2:

865

if src not in m2:

860

# deleted on side 2

866

# deleted on side 2

861

if src not in m1:

867

if src not in m1:

862

# renamed on side 1, deleted on side 2

868

# renamed on side 1, deleted on side 2

863

renamedelete[src] = dsts1

869

renamedelete[src] = dsts1

864

elif src not in mb:

870

elif src not in mb:

865

# Work around the "short-circuit to avoid issues with merge states"

871

# Work around the "short-circuit to avoid issues with merge states"

866

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

872

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

867

# destination doesn't exist in y.

873

# destination doesn't exist in y.

868

pass

874

pass

869

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

875

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

870

return

876

return

871

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

877

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

872

# modified on side 2

878

# modified on side 2

873

for dst in dsts1:

879

for dst in dsts1:

874

copy[dst] = src

880

copy[dst] = src

875

881

876

882

877

class branch_copies(object):

883

class branch_copies(object):

878

"""Information about copies made on one side of a merge/graft.

884

"""Information about copies made on one side of a merge/graft.

879

885

880

"copy" is a mapping from destination name -> source name,

886

"copy" is a mapping from destination name -> source name,

881

where source is in c1 and destination is in c2 or vice-versa.

887

where source is in c1 and destination is in c2 or vice-versa.

882

888

883

"movewithdir" is a mapping from source name -> destination name,

889

"movewithdir" is a mapping from source name -> destination name,

884

where the file at source present in one context but not the other

890

where the file at source present in one context but not the other

885

needs to be moved to destination by the merge process, because the

891

needs to be moved to destination by the merge process, because the

886

other context moved the directory it is in.

892

other context moved the directory it is in.

887

893

888

"renamedelete" is a mapping of source name -> list of destination

894

"renamedelete" is a mapping of source name -> list of destination

889

names for files deleted in c1 that were renamed in c2 or vice-versa.

895

names for files deleted in c1 that were renamed in c2 or vice-versa.

890

896

891

"dirmove" is a mapping of detected source dir -> destination dir renames.

897

"dirmove" is a mapping of detected source dir -> destination dir renames.

892

This is needed for handling changes to new files previously grafted into

898

This is needed for handling changes to new files previously grafted into

893

renamed directories.

899

renamed directories.

894

"""

900

"""

895

901

896

def __init__(

902

def __init__(

897

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

903

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

898

):

904

):

899

self.copy = {} if copy is None else copy

905

self.copy = {} if copy is None else copy

900

self.renamedelete = {} if renamedelete is None else renamedelete

906

self.renamedelete = {} if renamedelete is None else renamedelete

901

self.dirmove = {} if dirmove is None else dirmove

907

self.dirmove = {} if dirmove is None else dirmove

902

self.movewithdir = {} if movewithdir is None else movewithdir

908

self.movewithdir = {} if movewithdir is None else movewithdir

903

909

904

def __repr__(self):

910

def __repr__(self):

905

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

911

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

906

self.copy,

912

self.copy,

907

self.renamedelete,

913

self.renamedelete,

908

self.dirmove,

914

self.dirmove,

909

self.movewithdir,

915

self.movewithdir,

910

)

916

)

911

917

912

918

913

def _fullcopytracing(repo, c1, c2, base):

919

def _fullcopytracing(repo, c1, c2, base):

914

"""The full copytracing algorithm which finds all the new files that were

920

"""The full copytracing algorithm which finds all the new files that were

915

added from merge base up to the top commit and for each file it checks if

921

added from merge base up to the top commit and for each file it checks if

916

this file was copied from another file.

922

this file was copied from another file.

917

923

918

This is pretty slow when a lot of changesets are involved but will track all

924

This is pretty slow when a lot of changesets are involved but will track all

919

the copies.

925

the copies.

920

"""

926

"""

921

m1 = c1.manifest()

927

m1 = c1.manifest()

922

m2 = c2.manifest()

928

m2 = c2.manifest()

923

mb = base.manifest()

929

mb = base.manifest()

924

930

925

copies1 = pathcopies(base, c1)

931

copies1 = pathcopies(base, c1)

926

copies2 = pathcopies(base, c2)

932

copies2 = pathcopies(base, c2)

927

933

928

if not (copies1 or copies2):

934

if not (copies1 or copies2):

929

return branch_copies(), branch_copies(), {}

935

return branch_copies(), branch_copies(), {}

930

936

931

inversecopies1 = {}

937

inversecopies1 = {}

932

inversecopies2 = {}

938

inversecopies2 = {}

933

for dst, src in copies1.items():

939

for dst, src in copies1.items():

934

inversecopies1.setdefault(src, []).append(dst)

940

inversecopies1.setdefault(src, []).append(dst)

935

for dst, src in copies2.items():

941

for dst, src in copies2.items():

936

inversecopies2.setdefault(src, []).append(dst)

942

inversecopies2.setdefault(src, []).append(dst)

937

943

938

copy1 = {}

944

copy1 = {}

939

copy2 = {}

945

copy2 = {}

940

diverge = {}

946

diverge = {}

941

renamedelete1 = {}

947

renamedelete1 = {}

942

renamedelete2 = {}

948

renamedelete2 = {}

943

allsources = set(inversecopies1) | set(inversecopies2)

949

allsources = set(inversecopies1) | set(inversecopies2)

944

for src in allsources:

950

for src in allsources:

945

dsts1 = inversecopies1.get(src)

951

dsts1 = inversecopies1.get(src)

946

dsts2 = inversecopies2.get(src)

952

dsts2 = inversecopies2.get(src)

947

if dsts1 and dsts2:

953

if dsts1 and dsts2:

948

# copied/renamed on both sides

954

# copied/renamed on both sides

949

if src not in m1 and src not in m2:

955

if src not in m1 and src not in m2:

950

# renamed on both sides

956

# renamed on both sides

951

dsts1 = set(dsts1)

957

dsts1 = set(dsts1)

952

dsts2 = set(dsts2)

958

dsts2 = set(dsts2)

953

# If there's some overlap in the rename destinations, we

959

# If there's some overlap in the rename destinations, we

954

# consider it not divergent. For example, if side 1 copies 'a'

960

# consider it not divergent. For example, if side 1 copies 'a'

955

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

961

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

956

# and 'd' and deletes 'a'.

962

# and 'd' and deletes 'a'.

957

if dsts1 & dsts2:

963

if dsts1 & dsts2:

958

for dst in dsts1 & dsts2:

964

for dst in dsts1 & dsts2:

959

copy1[dst] = src

965

copy1[dst] = src

960

copy2[dst] = src

966

copy2[dst] = src

961

else:

967

else:

962

diverge[src] = sorted(dsts1 | dsts2)

968

diverge[src] = sorted(dsts1 | dsts2)

963

elif src in m1 and src in m2:

969

elif src in m1 and src in m2:

964

# copied on both sides

970

# copied on both sides

965

dsts1 = set(dsts1)

971

dsts1 = set(dsts1)

966

dsts2 = set(dsts2)

972

dsts2 = set(dsts2)

967

for dst in dsts1 & dsts2:

973

for dst in dsts1 & dsts2:

968

copy1[dst] = src

974

copy1[dst] = src

969

copy2[dst] = src

975

copy2[dst] = src

970

# TODO: Handle cases where it was renamed on one side and copied

976

# TODO: Handle cases where it was renamed on one side and copied

971

# on the other side

977

# on the other side

972

elif dsts1:

978

elif dsts1:

973

# copied/renamed only on side 1

979

# copied/renamed only on side 1

974

_checksinglesidecopies(

980

_checksinglesidecopies(

975

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

981

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

976

)

982

)

977

elif dsts2:

983

elif dsts2:

978

# copied/renamed only on side 2

984

# copied/renamed only on side 2

979

_checksinglesidecopies(

985

_checksinglesidecopies(

980

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

986

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

981

)

987

)

982

988

983

# find interesting file sets from manifests

989

# find interesting file sets from manifests

984

cache = []

990

cache = []

985

991

986

def _get_addedfiles(idx):

992

def _get_addedfiles(idx):

987

if not cache:

993

if not cache:

988

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

994

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

989

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

995

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

990

u1 = sorted(addedinm1 - addedinm2)

996

u1 = sorted(addedinm1 - addedinm2)

991

u2 = sorted(addedinm2 - addedinm1)

997

u2 = sorted(addedinm2 - addedinm1)

992

cache.extend((u1, u2))

998

cache.extend((u1, u2))

993

return cache[idx]

999

return cache[idx]

994

1000

995

u1fn = lambda: _get_addedfiles(0)

1001

u1fn = lambda: _get_addedfiles(0)

996

u2fn = lambda: _get_addedfiles(1)

1002

u2fn = lambda: _get_addedfiles(1)

997

if repo.ui.debugflag:

1003

if repo.ui.debugflag:

998

u1 = u1fn()

1004

u1 = u1fn()

999

u2 = u2fn()

1005

u2 = u2fn()

1000

1006

1001

header = b" unmatched files in %s"

1007

header = b" unmatched files in %s"

1002

if u1:

1008

if u1:

1003

repo.ui.debug(

1009

repo.ui.debug(

1004

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

1010

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

1005

)

1011

)

1006

if u2:

1012

if u2:

1007

repo.ui.debug(

1013

repo.ui.debug(

1008

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

1014

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

1009

)

1015

)

1010

1016

1011

renamedeleteset = set()

1017

renamedeleteset = set()

1012

divergeset = set()

1018

divergeset = set()

1013

for dsts in diverge.values():

1019

for dsts in diverge.values():

1014

divergeset.update(dsts)

1020

divergeset.update(dsts)

1015

for dsts in renamedelete1.values():

1021

for dsts in renamedelete1.values():

1016

renamedeleteset.update(dsts)

1022

renamedeleteset.update(dsts)

1017

for dsts in renamedelete2.values():

1023

for dsts in renamedelete2.values():

1018

renamedeleteset.update(dsts)

1024

renamedeleteset.update(dsts)

1019

1025

1020

repo.ui.debug(

1026

repo.ui.debug(

1021

b" all copies found (* = to merge, ! = divergent, "

1027

b" all copies found (* = to merge, ! = divergent, "

1022

b"% = renamed and deleted):\n"

1028

b"% = renamed and deleted):\n"

1023

)

1029

)

1024

for side, copies in ((b"local", copies1), (b"remote", copies2)):

1030

for side, copies in ((b"local", copies1), (b"remote", copies2)):

1025

if not copies:

1031

if not copies:

1026

continue

1032

continue

1027

repo.ui.debug(b" on %s side:\n" % side)

1033

repo.ui.debug(b" on %s side:\n" % side)

1028

for f in sorted(copies):

1034

for f in sorted(copies):

1029

note = b""

1035

note = b""

1030

if f in copy1 or f in copy2:

1036

if f in copy1 or f in copy2:

1031

note += b"*"

1037

note += b"*"

1032

if f in divergeset:

1038

if f in divergeset:

1033

note += b"!"

1039

note += b"!"

1034

if f in renamedeleteset:

1040

if f in renamedeleteset:

1035

note += b"%"

1041

note += b"%"

1036

repo.ui.debug(

1042

repo.ui.debug(

1037

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

1043

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

1038

)

1044

)

1039

del renamedeleteset

1045

del renamedeleteset

1040

del divergeset

1046

del divergeset

1041

1047

1042

repo.ui.debug(b" checking for directory renames\n")

1048

repo.ui.debug(b" checking for directory renames\n")

1043

1049

1044

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

1050

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

1045

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

1051

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

1046

1052

1047

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1053

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1048

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1054

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1049

1055

1050

return branch_copies1, branch_copies2, diverge

1056

return branch_copies1, branch_copies2, diverge

1051

1057

1052

1058

1053

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1059

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1054

"""Finds moved directories and files that should move with them.

1060

"""Finds moved directories and files that should move with them.

1055

1061

1056

ctx: the context for one of the sides

1062

ctx: the context for one of the sides

1057

copy: files copied on the same side (as ctx)

1063

copy: files copied on the same side (as ctx)

1058

fullcopy: files copied on the same side (as ctx), including those that

1064

fullcopy: files copied on the same side (as ctx), including those that

1059

merge.manifestmerge() won't care about

1065

merge.manifestmerge() won't care about

1060

addedfilesfn: function returning added files on the other side (compared to

1066

addedfilesfn: function returning added files on the other side (compared to

1061

ctx)

1067

ctx)

1062

"""

1068

"""

1063

# generate a directory move map

1069

# generate a directory move map

1064

invalid = set()

1070

invalid = set()

1065

dirmove = {}

1071

dirmove = {}

1066

1072

1067

# examine each file copy for a potential directory move, which is

1073

# examine each file copy for a potential directory move, which is

1068

# when all the files in a directory are moved to a new directory

1074

# when all the files in a directory are moved to a new directory

1069

for dst, src in pycompat.iteritems(fullcopy):

1075

for dst, src in pycompat.iteritems(fullcopy):

1070

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1076

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1071

if dsrc in invalid:

1077

if dsrc in invalid:

1072

# already seen to be uninteresting

1078

# already seen to be uninteresting

1073

continue

1079

continue

1074

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1080

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1075

# directory wasn't entirely moved locally

1081

# directory wasn't entirely moved locally

1076

invalid.add(dsrc)

1082

invalid.add(dsrc)

1077

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1083

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1078

# files from the same directory moved to two different places

1084

# files from the same directory moved to two different places

1079

invalid.add(dsrc)

1085

invalid.add(dsrc)

1080

else:

1086

else:

1081

# looks good so far

1087

# looks good so far

1082

dirmove[dsrc] = ddst

1088

dirmove[dsrc] = ddst

1083

1089

1084

for i in invalid:

1090

for i in invalid:

1085

if i in dirmove:

1091

if i in dirmove:

1086

del dirmove[i]

1092

del dirmove[i]

1087

del invalid

1093

del invalid

1088

1094

1089

if not dirmove:

1095

if not dirmove:

1090

return {}, {}

1096

return {}, {}

1091

1097

1092

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1098

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1093

1099

1094

for d in dirmove:

1100

for d in dirmove:

1095

repo.ui.debug(

1101

repo.ui.debug(

1096

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1102

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1097

)

1103

)

1098

1104

1099

# Sort the directories in reverse order, so we find children first

1105

# Sort the directories in reverse order, so we find children first

1100

# For example, if dir1/ was renamed to dir2/, and dir1/subdir1/

1106

# For example, if dir1/ was renamed to dir2/, and dir1/subdir1/

1101

# was renamed to dir2/subdir2/, we want to move dir1/subdir1/file

1107

# was renamed to dir2/subdir2/, we want to move dir1/subdir1/file

1102

# to dir2/subdir2/file (not dir2/subdir1/file)

1108

# to dir2/subdir2/file (not dir2/subdir1/file)

1103

dirmove_children_first = sorted(dirmove, reverse=True)

1109

dirmove_children_first = sorted(dirmove, reverse=True)

1104

1110

1105

movewithdir = {}

1111

movewithdir = {}

1106

# check unaccounted nonoverlapping files against directory moves

1112

# check unaccounted nonoverlapping files against directory moves

1107

for f in addedfilesfn():

1113

for f in addedfilesfn():

1108

if f not in fullcopy:

1114

if f not in fullcopy:

1109

for d in dirmove_children_first:

1115

for d in dirmove_children_first:

1110

if f.startswith(d):

1116

if f.startswith(d):

1111

# new file added in a directory that was moved, move it

1117

# new file added in a directory that was moved, move it

1112

df = dirmove[d] + f[len(d) :]

1118

df = dirmove[d] + f[len(d) :]

1113

if df not in copy:

1119

if df not in copy:

1114

movewithdir[f] = df

1120

movewithdir[f] = df

1115

repo.ui.debug(

1121

repo.ui.debug(

1116

b" pending file src: '%s' -> dst: '%s'\n"

1122

b" pending file src: '%s' -> dst: '%s'\n"

1117

% (f, df)

1123

% (f, df)

1118

)

1124

)

1119

break

1125

break

1120

1126

1121

return dirmove, movewithdir

1127

return dirmove, movewithdir

1122

1128

1123

1129

1124

def _heuristicscopytracing(repo, c1, c2, base):

1130

def _heuristicscopytracing(repo, c1, c2, base):

1125

"""Fast copytracing using filename heuristics

1131

"""Fast copytracing using filename heuristics

1126

1132

1127

Assumes that moves or renames are of following two types:

1133

Assumes that moves or renames are of following two types:

1128

1134

1129

1) Inside a directory only (same directory name but different filenames)

1135

1) Inside a directory only (same directory name but different filenames)

1130

2) Move from one directory to another

1136

2) Move from one directory to another

1131

(same filenames but different directory names)

1137

(same filenames but different directory names)

1132

1138

1133

Works only when there are no merge commits in the "source branch".

1139

Works only when there are no merge commits in the "source branch".

1134

Source branch is commits from base up to c2 not including base.

1140

Source branch is commits from base up to c2 not including base.

1135

1141

1136

If merge is involved it fallbacks to _fullcopytracing().

1142

If merge is involved it fallbacks to _fullcopytracing().

1137

1143

1138

Can be used by setting the following config:

1144

Can be used by setting the following config:

1139

1145

1140

[experimental]

1146

[experimental]

1141

copytrace = heuristics

1147

copytrace = heuristics

1142

1148

1143

In some cases the copy/move candidates found by heuristics can be very large

1149

In some cases the copy/move candidates found by heuristics can be very large

1144

in number and that will make the algorithm slow. The number of possible

1150

in number and that will make the algorithm slow. The number of possible

1145

candidates to check can be limited by using the config

1151

candidates to check can be limited by using the config

1146

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1152

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1147

"""

1153

"""

1148

1154

1149

if c1.rev() is None:

1155

if c1.rev() is None:

1150

c1 = c1.p1()

1156

c1 = c1.p1()

1151

if c2.rev() is None:

1157

if c2.rev() is None:

1152

c2 = c2.p1()

1158

c2 = c2.p1()

1153

1159

1154

changedfiles = set()

1160

changedfiles = set()

1155

m1 = c1.manifest()

1161

m1 = c1.manifest()

1156

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1162

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1157

# If base is not in c2 branch, we switch to fullcopytracing

1163

# If base is not in c2 branch, we switch to fullcopytracing

1158

repo.ui.debug(

1164

repo.ui.debug(

1159

b"switching to full copytracing as base is not "

1165

b"switching to full copytracing as base is not "

1160

b"an ancestor of c2\n"

1166

b"an ancestor of c2\n"

1161

)

1167

)

1162

return _fullcopytracing(repo, c1, c2, base)

1168

return _fullcopytracing(repo, c1, c2, base)

1163

1169

1164

ctx = c2

1170

ctx = c2

1165

while ctx != base:

1171

while ctx != base:

1166

if len(ctx.parents()) == 2:

1172

if len(ctx.parents()) == 2:

1167

# To keep things simple let's not handle merges

1173

# To keep things simple let's not handle merges

1168

repo.ui.debug(b"switching to full copytracing because of merges\n")

1174

repo.ui.debug(b"switching to full copytracing because of merges\n")

1169

return _fullcopytracing(repo, c1, c2, base)

1175

return _fullcopytracing(repo, c1, c2, base)

1170

changedfiles.update(ctx.files())

1176

changedfiles.update(ctx.files())

1171

ctx = ctx.p1()

1177

ctx = ctx.p1()

1172

1178

1173

copies2 = {}

1179

copies2 = {}

1174

cp = _forwardcopies(base, c2)

1180

cp = _forwardcopies(base, c2)

1175

for dst, src in pycompat.iteritems(cp):

1181

for dst, src in pycompat.iteritems(cp):

1176

if src in m1:

1182

if src in m1:

1177

copies2[dst] = src

1183

copies2[dst] = src

1178

1184

1179

# file is missing if it isn't present in the destination, but is present in

1185

# file is missing if it isn't present in the destination, but is present in

1180

# the base and present in the source.

1186

# the base and present in the source.

1181

# Presence in the base is important to exclude added files, presence in the

1187

# Presence in the base is important to exclude added files, presence in the

1182

# source is important to exclude removed files.

1188

# source is important to exclude removed files.

1183

filt = lambda f: f not in m1 and f in base and f in c2

1189

filt = lambda f: f not in m1 and f in base and f in c2

1184

missingfiles = [f for f in changedfiles if filt(f)]

1190

missingfiles = [f for f in changedfiles if filt(f)]

1185

1191

1186

copies1 = {}

1192

copies1 = {}

1187

if missingfiles:

1193

if missingfiles:

1188

basenametofilename = collections.defaultdict(list)

1194

basenametofilename = collections.defaultdict(list)

1189

dirnametofilename = collections.defaultdict(list)

1195

dirnametofilename = collections.defaultdict(list)

1190

1196

1191

for f in m1.filesnotin(base.manifest()):

1197

for f in m1.filesnotin(base.manifest()):

1192

basename = os.path.basename(f)

1198

basename = os.path.basename(f)

1193

dirname = os.path.dirname(f)

1199

dirname = os.path.dirname(f)

1194

basenametofilename[basename].append(f)

1200

basenametofilename[basename].append(f)

1195

dirnametofilename[dirname].append(f)

1201

dirnametofilename[dirname].append(f)

1196

1202

1197

for f in missingfiles:

1203

for f in missingfiles:

1198

basename = os.path.basename(f)

1204

basename = os.path.basename(f)

1199

dirname = os.path.dirname(f)

1205

dirname = os.path.dirname(f)

1200

samebasename = basenametofilename[basename]

1206

samebasename = basenametofilename[basename]

1201

samedirname = dirnametofilename[dirname]

1207

samedirname = dirnametofilename[dirname]

1202

movecandidates = samebasename + samedirname

1208

movecandidates = samebasename + samedirname

1203

# f is guaranteed to be present in c2, that's why

1209

# f is guaranteed to be present in c2, that's why

1204

# c2.filectx(f) won't fail

1210

# c2.filectx(f) won't fail

1205

f2 = c2.filectx(f)

1211

f2 = c2.filectx(f)

1206

# we can have a lot of candidates which can slow down the heuristics

1212

# we can have a lot of candidates which can slow down the heuristics

1207

# config value to limit the number of candidates moves to check

1213

# config value to limit the number of candidates moves to check

1208

maxcandidates = repo.ui.configint(

1214

maxcandidates = repo.ui.configint(

1209

b'experimental', b'copytrace.movecandidateslimit'

1215

b'experimental', b'copytrace.movecandidateslimit'

1210

)

1216

)

1211

1217

1212

if len(movecandidates) > maxcandidates:

1218

if len(movecandidates) > maxcandidates:

1213

repo.ui.status(

1219

repo.ui.status(

1214

_(

1220

_(

1215

b"skipping copytracing for '%s', more "

1221

b"skipping copytracing for '%s', more "

1216

b"candidates than the limit: %d\n"

1222

b"candidates than the limit: %d\n"

1217

)

1223

)

1218

% (f, len(movecandidates))

1224

% (f, len(movecandidates))

1219

)

1225

)

1220

continue

1226

continue

1221

1227

1222

for candidate in movecandidates:

1228

for candidate in movecandidates:

1223

f1 = c1.filectx(candidate)

1229

f1 = c1.filectx(candidate)

1224

if _related(f1, f2):

1230

if _related(f1, f2):

1225

# if there are a few related copies then we'll merge

1231

# if there are a few related copies then we'll merge

1226

# changes into all of them. This matches the behaviour

1232

# changes into all of them. This matches the behaviour

1227

# of upstream copytracing

1233

# of upstream copytracing

1228

copies1[candidate] = f

1234

copies1[candidate] = f

1229

1235

1230

return branch_copies(copies1), branch_copies(copies2), {}

1236

return branch_copies(copies1), branch_copies(copies2), {}

1231

1237

1232

1238

1233

def _related(f1, f2):

1239

def _related(f1, f2):

1234

"""return True if f1 and f2 filectx have a common ancestor

1240

"""return True if f1 and f2 filectx have a common ancestor

1235

1241

1236

Walk back to common ancestor to see if the two files originate

1242

Walk back to common ancestor to see if the two files originate

1237

from the same file. Since workingfilectx's rev() is None it messes

1243

from the same file. Since workingfilectx's rev() is None it messes

1238

up the integer comparison logic, hence the pre-step check for

1244

up the integer comparison logic, hence the pre-step check for

1239

None (f1 and f2 can only be workingfilectx's initially).

1245

None (f1 and f2 can only be workingfilectx's initially).

1240

"""

1246

"""

1241

1247

1242

if f1 == f2:

1248

if f1 == f2:

1243

return True # a match

1249

return True # a match

1244

1250

1245

g1, g2 = f1.ancestors(), f2.ancestors()

1251

g1, g2 = f1.ancestors(), f2.ancestors()

1246

try:

1252

try:

1247

f1r, f2r = f1.linkrev(), f2.linkrev()

1253

f1r, f2r = f1.linkrev(), f2.linkrev()

1248

1254

1249

if f1r is None:

1255

if f1r is None:

1250

f1 = next(g1)

1256

f1 = next(g1)

1251

if f2r is None:

1257

if f2r is None:

1252

f2 = next(g2)

1258

f2 = next(g2)

1253

1259

1254

while True:

1260

while True:

1255

f1r, f2r = f1.linkrev(), f2.linkrev()

1261

f1r, f2r = f1.linkrev(), f2.linkrev()

1256

if f1r > f2r:

1262

if f1r > f2r:

1257

f1 = next(g1)

1263

f1 = next(g1)

1258

elif f2r > f1r:

1264

elif f2r > f1r:

1259

f2 = next(g2)

1265

f2 = next(g2)

1260

else: # f1 and f2 point to files in the same linkrev

1266

else: # f1 and f2 point to files in the same linkrev

1261

return f1 == f2 # true if they point to the same file

1267

return f1 == f2 # true if they point to the same file

1262

except StopIteration:

1268

except StopIteration:

1263

return False

1269

return False

1264

1270

1265

1271

1266

def graftcopies(wctx, ctx, base):

1272

def graftcopies(wctx, ctx, base):

1267

"""reproduce copies between base and ctx in the wctx

1273

"""reproduce copies between base and ctx in the wctx

1268

1274

1269

Unlike mergecopies(), this function will only consider copies between base

1275

Unlike mergecopies(), this function will only consider copies between base

1270

and ctx; it will ignore copies between base and wctx. Also unlike

1276

and ctx; it will ignore copies between base and wctx. Also unlike

1271

mergecopies(), this function will apply copies to the working copy (instead

1277

mergecopies(), this function will apply copies to the working copy (instead

1272

of just returning information about the copies). That makes it cheaper

1278

of just returning information about the copies). That makes it cheaper

1273

(especially in the common case of base==ctx.p1()) and useful also when

1279

(especially in the common case of base==ctx.p1()) and useful also when

1274

experimental.copytrace=off.

1280

experimental.copytrace=off.

1275

1281

1276

merge.update() will have already marked most copies, but it will only

1282

merge.update() will have already marked most copies, but it will only

1277

mark copies if it thinks the source files are related (see

1283

mark copies if it thinks the source files are related (see

1278

merge._related()). It will also not mark copies if the file wasn't modified

1284

merge._related()). It will also not mark copies if the file wasn't modified

1279

on the local side. This function adds the copies that were "missed"

1285

on the local side. This function adds the copies that were "missed"

1280

by merge.update().

1286

by merge.update().

1281

"""

1287

"""

1282

new_copies = pathcopies(base, ctx)

1288

new_copies = pathcopies(base, ctx)

1283

parent = wctx.p1()

1289

parent = wctx.p1()

1284

_filter(parent, wctx, new_copies)

1290

_filter(parent, wctx, new_copies)

1285

# Extra filtering to drop copy information for files that existed before

1291

# Extra filtering to drop copy information for files that existed before

1286

# the graft. This is to handle the case of grafting a rename onto a commit

1292

# the graft. This is to handle the case of grafting a rename onto a commit

1287

# that already has the rename. Otherwise the presence of copy information

1293

# that already has the rename. Otherwise the presence of copy information

1288

# would result in the creation of an empty commit where we would prefer to

1294

# would result in the creation of an empty commit where we would prefer to

1289

# not create one.

1295

# not create one.

1290

for dest, __ in list(new_copies.items()):

1296

for dest, __ in list(new_copies.items()):

1291

if dest in parent:

1297

if dest in parent:

1292

del new_copies[dest]

1298

del new_copies[dest]

1293

for dst, src in pycompat.iteritems(new_copies):

1299

for dst, src in pycompat.iteritems(new_copies):

1294

wctx[dst].markcopied(src)

1300

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import (
                 nullid,
                 nullrev,
             )
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 6
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     if k == v:  # case 3
                         del t[k]
                     elif v not in src:  # case 5
                         # remove copies from files that didn't exist
                         del t[k]
                     elif k not in dst:  # case 1
                         # remove copies to files that were then removed
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
                     missing = list(b.walk(match))
                     # _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(b'debug.copies:      searching all files: %d\n' % len(missing))
                 else:
                     missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(
                             b'debug.copies:      missing files to search: %d\n'
                             % len(missing)
                         )
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 if rustmod is not None:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         return (p1, p2, raw)
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         else:
                             changes = None
                         return (p1, p2, changes)
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
                     # this deal with the special case mentionned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
                     # We iterate over the childre, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
                     # An interresting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     return _combine_changeset_copies(
                         revs,
                         children_count,
                         b.rev(),
                         revinfo,
                         match,
                         isancestor,
                         multi_thread,
                     )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None:
                     final_copies = rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, multi_thread
                     )
                 else:
                     isancestor = cached_is_ancestor(isancestor)
                     all_copies = {}
                     # iterate over all the "children" side of copy tracing "edge"
                     for current_rev in revs:
                         p1, p2, changes = revinfo(current_rev)
                         current_copies = None
                         # iterate over all parents to chain the existing data with the
                         # data from the parent → child edge.
                         for parent, parent_rev in ((1, p1), (2, p2)):
                             if parent_rev == nullrev:
                                 continue
                             remaining_children = children_count.get(parent_rev)
                             if remaining_children is None:
                                 continue
                             remaining_children -= 1
                             children_count[parent_rev] = remaining_children
                             if remaining_children:
                                 copies = all_copies.get(parent_rev, None)
                             else:
                                 copies = all_copies.pop(parent_rev, None)
                             if copies is None:
                                 # this is a root
                                 newcopies = copies = {}
                             elif remaining_children:
                                 newcopies = copies.copy()
                             else:
                                 newcopies = copies
                             # chain the data in the edge with the existing data
                             if changes is not None:
                                 childcopies = {}
                                 if parent == 1:
                                     childcopies = changes.copied_from_p1
                                 elif parent == 2:
                                     childcopies = changes.copied_from_p2
                                 if childcopies:
                                     newcopies = copies.copy()
                                     for dest, source in pycompat.iteritems(childcopies):
                                         prev = copies.get(source)
                                         if prev is not None and prev[1] is not None:
                                             source = prev[1]
                                         newcopies[dest] = (current_rev, source)
                                     assert newcopies is not copies
                                 if changes.removed:
                                     for f in changes.removed:
                                         if f in newcopies:
                                             if newcopies is copies:
                                                 # copy on write to avoid affecting potential other
                                                 # branches.  when there are no other branches, this
                                                 # could be avoided.
                                                 newcopies = copies.copy()
                                             newcopies[f] = (current_rev, None)
                             # check potential need to combine the data from another parent (for
                             # that child). See comment below for details.
                             if current_copies is None:
                                 current_copies = newcopies
                             else:
                                 # we are the second parent to work on c, we need to merge our
                                 # work with the other.
                                 #
                                 # In case of conflict, parent 1 take precedence over parent 2.
                                 # This is an arbitrary choice made anew when implementing
                                 # changeset based copies. It was made without regards with
                                 # potential filelog related behavior.
                                 assert parent == 2
                                 current_copies = _merge_copies_dict(
                                     newcopies,
                                     current_copies,
                                     isancestor,
                                     changes,
                                     current_rev,
                                 )
                         all_copies[current_rev] = current_copies
                     # filter out internal details and return a {dest: source mapping}
                     final_copies = {}
                     for dest, (tt, source) in all_copies[targetrev].items():
                         if source is not None:
                             final_copies[dest] = source
                 if not alwaysmatch:
                     for filename in list(final_copies.keys()):
                         if not match(filename):
                             del final_copies[filename]
                 return final_copies
             # constant to decide which side to pick with _merge_copies_dict
             PICK_MINOR = 0
             PICK_MAJOR = 1
             PICK_EITHER = 2
             def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         pick, overwrite = _compare_values(
                             changes, isancestor, dest, other, value
                         )
                         if overwrite:
                             if pick == PICK_MAJOR:
                                 minor[dest] = (current_merge, value[1])
                             else:
                                 minor[dest] = (current_merge, other[1])
                         elif pick == PICK_MAJOR:
                             minor[dest] = value
                 return minor
             def _compare_values(changes, isancestor, dest, minor, major):
                 """compare two value within a _merge_copies_dict loop iteration
                 return (pick, overwrite).
                 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
                 - overwrite is True if pick is a return of an ambiguity that needs resolution.
                 """
                 major_tt, major_value = major
                 minor_tt, minor_value = minor
                 if major_tt == minor_tt:
                     # if it comes from the same revision it must be the same value
                     assert major_value == minor_value
                     return PICK_EITHER, False
                 elif (
                     changes is not None
                     and minor_value is not None
                     and major_value is None
                     and dest in changes.salvaged
                 ):
                     # In this case, a deletion was reverted, the "alive" value overwrite
                     # the deleted one.
                     return PICK_MINOR, True
                 elif (
                     changes is not None
                     and major_value is not None
                     and minor_value is None
                     and dest in changes.salvaged
                 ):
                     # In this case, a deletion was reverted, the "alive" value overwrite
                     # the deleted one.
                     return PICK_MAJOR, True
                 elif isancestor(minor_tt, major_tt):
                     if changes is not None and dest in changes.merged:
                         # change to dest happened on the branch without copy-source change,
                         # so both source are valid and "major" wins.
                         return PICK_MAJOR, True
                     else:
                         return PICK_MAJOR, False
                 elif isancestor(major_tt, minor_tt):
                     if changes is not None and dest in changes.merged:
                         # change to dest happened on the branch without copy-source change,
                         # so both source are valid and "major" wins.
                         return PICK_MAJOR, True
                     else:
                         return PICK_MINOR, False
                 elif minor_value is None:
                     # in case of conflict, the "alive" side wins.
                     return PICK_MAJOR, True
                 elif major_value is None:
                     # in case of conflict, the "alive" side wins.
                     return PICK_MINOR, True
                 else:
                     # in case of conflict where both side are alive, major wins.
                     return PICK_MAJOR, True
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
+                """find renames from a to b"""
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
+                # We don't want to pass in "match" here, since that would filter
+                # the destination by it. Since we're reversing the copies, we want
+                # to filter the source instead.
+                copies = _forwardcopies(b, a)
+                return _reverse_renames(copies, a, match)
+            def _reverse_renames(copies, dst, match):
+                """given copies to context 'dst', finds renames from that context"""
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
-                # We don't want to pass in "match" here, since that would filter
-                # the destination by it. Since we're reversing the copies, we want
-                # to filter the source instead.
-                f = _forwardcopies(b, a)
                 r = {}
-                for k, v in sorted(pycompat.iteritems(f)):
+                for k, v in sorted(pycompat.iteritems(copies)):
                     if match and not match(v):
                         continue
                     # remove copies
-                    if v in a:
+                    if v in dst:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 # Sort the directories in reverse order, so we find children first
                 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
                 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
                 # to dir2/subdir2/file (not dir2/subdir1/file)
                 dirmove_children_first = sorted(dirmove, reverse=True)
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove_children_first:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 parent = wctx.p1()
                 _filter(parent, wctx, new_copies)
                 # Extra filtering to drop copy information for files that existed before
                 # the graft. This is to handle the case of grafting a rename onto a commit
                 # that already has the rename. Otherwise the presence of copy information
                 # would result in the creation of an empty commit where we would prefer to
                 # not create one.
                 for dest, __ in list(new_copies.items()):
                     if dest in parent:
                         del new_copies[dest]
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)