upstream/mercurial-mirror Commit - r47139:892eb7c5

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import (

15

from .node import (

16

nullid,

16

nullid,

17

nullrev,

17

nullrev,

18

)

18

)

19

20

from . import (

20

from . import (

21

match as matchmod,

21

match as matchmod,

22

pathutil,

22

pathutil,

23

policy,

23

policy,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

from .revlogutils import (

31

from .revlogutils import (

32

flagutil,

32

flagutil,

33

sidedata as sidedatamod,

33

sidedata as sidedatamod,

34

)

34

)

35

36

rustmod = policy.importrust("copy_tracing")

36

rustmod = policy.importrust("copy_tracing")

37

38

39

def _filter(src, dst, t):

39

def _filter(src, dst, t):

40

"""filters out invalid copies after chaining"""

40

"""filters out invalid copies after chaining"""

41

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

44

# in the following table (not including trivial cases). For example, case 6

44

# in the following table (not including trivial cases). For example, case 6

45

# is where a file existed in 'src' and remained under that name in 'mid' and

45

# is where a file existed in 'src' and remained under that name in 'mid' and

46

# then was renamed between 'mid' and 'dst'.

46

# then was renamed between 'mid' and 'dst'.

47

#

47

#

48

# case src mid dst result

48

# case src mid dst result

49

# 1 x y - -

49

# 1 x y - -

50

# 2 x y y x->y

50

# 2 x y y x->y

51

# 3 x y x -

51

# 3 x y x -

52

# 4 x y z x->z

52

# 4 x y z x->z

53

# 5 - x y -

53

# 5 - x y -

54

# 6 x x y x->y

54

# 6 x x y x->y

55

#

55

#

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

58

# between 5 and 6, so it includes all cases in its result.

58

# between 5 and 6, so it includes all cases in its result.

59

# Cases 1, 3, and 5 are then removed by _filter().

59

# Cases 1, 3, and 5 are then removed by _filter().

60

61

for k, v in list(t.items()):

61

for k, v in list(t.items()):

62

if k == v: # case 3

62

if k == v: # case 3

63

del t[k]

63

del t[k]

64

elif v not in src: # case 5

64

elif v not in src: # case 5

65

# remove copies from files that didn't exist

65

# remove copies from files that didn't exist

66

del t[k]

66

del t[k]

67

elif k not in dst: # case 1

67

elif k not in dst: # case 1

68

# remove copies to files that were then removed

68

# remove copies to files that were then removed

69

del t[k]

69

del t[k]

70

71

72

def _chain(prefix, suffix):

72

def _chain(prefix, suffix):

73

"""chain two sets of copies 'prefix' and 'suffix'"""

73

"""chain two sets of copies 'prefix' and 'suffix'"""

74

result = prefix.copy()

74

result = prefix.copy()

75

for key, value in pycompat.iteritems(suffix):

75

for key, value in pycompat.iteritems(suffix):

76

result[key] = prefix.get(value, value)

76

result[key] = prefix.get(value, value)

77

return result

77

return result

78

79

80

def _tracefile(fctx, am, basemf):

80

def _tracefile(fctx, am, basemf):

81

"""return file context that is the ancestor of fctx present in ancestor

81

"""return file context that is the ancestor of fctx present in ancestor

82

manifest am

82

manifest am

83

84

Note: we used to try and stop after a given limit, however checking if that

84

Note: we used to try and stop after a given limit, however checking if that

85

limit is reached turned out to be very expensive. we are better off

85

limit is reached turned out to be very expensive. we are better off

86

disabling that feature."""

86

disabling that feature."""

87

88

for f in fctx.ancestors():

88

for f in fctx.ancestors():

89

path = f.path()

89

path = f.path()

90

if am.get(path, None) == f.filenode():

90

if am.get(path, None) == f.filenode():

91

return path

91

return path

92

if basemf and basemf.get(path, None) == f.filenode():

92

if basemf and basemf.get(path, None) == f.filenode():

93

return path

93

return path

94

95

96

def _dirstatecopies(repo, match=None):

96

def _dirstatecopies(repo, match=None):

97

ds = repo.dirstate

97

ds = repo.dirstate

98

c = ds.copies().copy()

98

c = ds.copies().copy()

99

for k in list(c):

99

for k in list(c):

100

if ds[k] not in b'anm' or (match and not match(k)):

100

if ds[k] not in b'anm' or (match and not match(k)):

101

del c[k]

101

del c[k]

102

return c

102

return c

103

104

105

def _computeforwardmissing(a, b, match=None):

105

def _computeforwardmissing(a, b, match=None):

106

"""Computes which files are in b but not a.

106

"""Computes which files are in b but not a.

107

This is its own function so extensions can easily wrap this call to see what

107

This is its own function so extensions can easily wrap this call to see what

108

files _forwardcopies is about to process.

108

files _forwardcopies is about to process.

109

"""

109

"""

110

ma = a.manifest()

110

ma = a.manifest()

111

mb = b.manifest()

111

mb = b.manifest()

112

return mb.filesnotin(ma, match=match)

112

return mb.filesnotin(ma, match=match)

113

114

115

def usechangesetcentricalgo(repo):

115

def usechangesetcentricalgo(repo):

116

"""Checks if we should use changeset-centric copy algorithms"""

116

"""Checks if we should use changeset-centric copy algorithms"""

117

if repo.filecopiesmode == b'changeset-sidedata':

117

if repo.filecopiesmode == b'changeset-sidedata':

118

return True

118

return True

119

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

119

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

120

changesetsource = (b'changeset-only', b'compatibility')

120

changesetsource = (b'changeset-only', b'compatibility')

121

return readfrom in changesetsource

121

return readfrom in changesetsource

122

123

124

def _committedforwardcopies(a, b, base, match):

124

def _committedforwardcopies(a, b, base, match):

125

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

125

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

126

# files might have to be traced back to the fctx parent of the last

126

# files might have to be traced back to the fctx parent of the last

127

# one-side-only changeset, but not further back than that

127

# one-side-only changeset, but not further back than that

128

repo = a._repo

128

repo = a._repo

129

130

if usechangesetcentricalgo(repo):

130

if usechangesetcentricalgo(repo):

131

return _changesetforwardcopies(a, b, match)

131

return _changesetforwardcopies(a, b, match)

132

133

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

133

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

134

dbg = repo.ui.debug

134

dbg = repo.ui.debug

135

if debug:

135

if debug:

136

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

136

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

137

am = a.manifest()

137

am = a.manifest()

138

basemf = None if base is None else base.manifest()

138

basemf = None if base is None else base.manifest()

139

140

# find where new files came from

140

# find where new files came from

141

# we currently don't try to find where old files went, too expensive

141

# we currently don't try to find where old files went, too expensive

142

# this means we can miss a case like 'hg rm b; hg cp a b'

142

# this means we can miss a case like 'hg rm b; hg cp a b'

143

cm = {}

143

cm = {}

144

145

# Computing the forward missing is quite expensive on large manifests, since

145

# Computing the forward missing is quite expensive on large manifests, since

146

# it compares the entire manifests. We can optimize it in the common use

146

# it compares the entire manifests. We can optimize it in the common use

147

# case of computing what copies are in a commit versus its parent (like

147

# case of computing what copies are in a commit versus its parent (like

148

# during a rebase or histedit). Note, we exclude merge commits from this

148

# during a rebase or histedit). Note, we exclude merge commits from this

149

# optimization, since the ctx.files() for a merge commit is not correct for

149

# optimization, since the ctx.files() for a merge commit is not correct for

150

# this comparison.

150

# this comparison.

151

forwardmissingmatch = match

151

forwardmissingmatch = match

152

if b.p1() == a and b.p2().node() == nullid:

152

if b.p1() == a and b.p2().node() == nullid:

153

filesmatcher = matchmod.exact(b.files())

153

filesmatcher = matchmod.exact(b.files())

154

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

154

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

155

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

155

if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):

156

missing = list(b.walk(match))

156

missing = list(b.walk(match))

157

# _computeforwardmissing(a, b, match=forwardmissingmatch)

157

# _computeforwardmissing(a, b, match=forwardmissingmatch)

158

if debug:

158

if debug:

159

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

159

dbg(b'debug.copies: searching all files: %d\n' % len(missing))

160

else:

160

else:

161

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

161

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

162

if debug:

162

if debug:

163

dbg(

163

dbg(

164

b'debug.copies: missing files to search: %d\n'

164

b'debug.copies: missing files to search: %d\n'

165

% len(missing)

165

% len(missing)

166

)

166

)

167

168

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

168

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

169

170

for f in sorted(missing):

170

for f in sorted(missing):

171

if debug:

171

if debug:

172

dbg(b'debug.copies: tracing file: %s\n' % f)

172

dbg(b'debug.copies: tracing file: %s\n' % f)

173

fctx = b[f]

173

fctx = b[f]

174

fctx._ancestrycontext = ancestrycontext

174

fctx._ancestrycontext = ancestrycontext

175

176

if debug:

176

if debug:

177

start = util.timer()

177

start = util.timer()

178

opath = _tracefile(fctx, am, basemf)

178

opath = _tracefile(fctx, am, basemf)

179

if opath:

179

if opath:

180

if debug:

180

if debug:

181

dbg(b'debug.copies: rename of: %s\n' % opath)

181

dbg(b'debug.copies: rename of: %s\n' % opath)

182

cm[f] = opath

182

cm[f] = opath

183

if debug:

183

if debug:

184

dbg(

184

dbg(

185

b'debug.copies: time: %f seconds\n'

185

b'debug.copies: time: %f seconds\n'

186

% (util.timer() - start)

186

% (util.timer() - start)

187

)

187

)

188

return cm

188

return cm

189

190

191

def _revinfo_getter(repo, match):

191

def _revinfo_getter(repo, match):

192

"""returns a function that returns the following data given a <rev>"

192

"""returns a function that returns the following data given a <rev>"

193

194

* p1: revision number of first parent

194

* p1: revision number of first parent

195

* p2: revision number of first parent

195

* p2: revision number of first parent

196

* changes: a ChangingFiles object

196

* changes: a ChangingFiles object

197

"""

197

"""

198

cl = repo.changelog

198

cl = repo.changelog

199

parents = cl.parentrevs

199

parents = cl.parentrevs

200

flags = cl.flags

200

flags = cl.flags

201

202

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

202

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

203

204

changelogrevision = cl.changelogrevision

204

changelogrevision = cl.changelogrevision

205

206

if rustmod is not None:

206

if rustmod is not None:

207

208

def revinfo(rev):

208

def revinfo(rev):

209

p1, p2 = parents(rev)

209

p1, p2 = parents(rev)

210

if flags(rev) & HASCOPIESINFO:

210

if flags(rev) & HASCOPIESINFO:

211

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

211

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

212

else:

212

else:

213

raw = None

213

raw = None

214

return (p1, p2, raw)

214

return (p1, p2, raw)

215

216

else:

216

else:

217

218

def revinfo(rev):

218

def revinfo(rev):

219

p1, p2 = parents(rev)

219

p1, p2 = parents(rev)

220

if flags(rev) & HASCOPIESINFO:

220

if flags(rev) & HASCOPIESINFO:

221

changes = changelogrevision(rev).changes

221

changes = changelogrevision(rev).changes

222

else:

222

else:

223

changes = None

223

changes = None

224

return (p1, p2, changes)

224

return (p1, p2, changes)

225

226

return revinfo

226

return revinfo

227

228

229

def cached_is_ancestor(is_ancestor):

229

def cached_is_ancestor(is_ancestor):

230

"""return a cached version of is_ancestor"""

230

"""return a cached version of is_ancestor"""

231

cache = {}

231

cache = {}

232

233

def _is_ancestor(anc, desc):

233

def _is_ancestor(anc, desc):

234

if anc > desc:

234

if anc > desc:

235

return False

235

return False

236

elif anc == desc:

236

elif anc == desc:

237

return True

237

return True

238

key = (anc, desc)

238

key = (anc, desc)

239

ret = cache.get(key)

239

ret = cache.get(key)

240

if ret is None:

240

if ret is None:

241

ret = cache[key] = is_ancestor(anc, desc)

241

ret = cache[key] = is_ancestor(anc, desc)

242

return ret

242

return ret

243

244

return _is_ancestor

244

return _is_ancestor

245

246

247

def _changesetforwardcopies(a, b, match):

247

def _changesetforwardcopies(a, b, match):

248

if a.rev() in (nullrev, b.rev()):

248

if a.rev() in (nullrev, b.rev()):

249

return {}

249

return {}

250

251

repo = a.repo().unfiltered()

251

repo = a.repo().unfiltered()

252

children = {}

252

children = {}

253

254

cl = repo.changelog

254

cl = repo.changelog

255

isancestor = cl.isancestorrev

255

isancestor = cl.isancestorrev

256

257

# To track rename from "A" to B, we need to gather all parent → children

257

# To track rename from "A" to B, we need to gather all parent → children

258

# edges that are contains in `::B` but not in `::A`.

258

# edges that are contains in `::B` but not in `::A`.

259

#

259

#

260

#

260

#

261

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

261

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

262

# ::a`) and also all the "roots point", ie the parents of the exclusive set

262

# ::a`) and also all the "roots point", ie the parents of the exclusive set

263

# that belong to ::a. These are exactly all the revisions needed to express

263

# that belong to ::a. These are exactly all the revisions needed to express

264

# the parent → children we need to combine.

264

# the parent → children we need to combine.

265

#

265

#

266

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

266

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

267

# excluding paths that leads to roots that are not ancestors of `a`. We

267

# excluding paths that leads to roots that are not ancestors of `a`. We

268

# keep this out of the explanation because it is hard enough without this special case..

268

# keep this out of the explanation because it is hard enough without this special case..

269

270

parents = cl._uncheckedparentrevs

270

parents = cl._uncheckedparentrevs

271

graph_roots = (nullrev, nullrev)

271

graph_roots = (nullrev, nullrev)

272

273

ancestors = cl.ancestors([a.rev()], inclusive=True)

273

ancestors = cl.ancestors([a.rev()], inclusive=True)

274

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

274

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

275

roots = set()

275

roots = set()

276

has_graph_roots = False

276

has_graph_roots = False

277

278

# iterate over `only(B, A)`

278

# iterate over `only(B, A)`

279

for r in revs:

279

for r in revs:

280

ps = parents(r)

280

ps = parents(r)

281

if ps == graph_roots:

281

if ps == graph_roots:

282

has_graph_roots = True

282

has_graph_roots = True

283

else:

283

else:

284

p1, p2 = ps

284

p1, p2 = ps

285

286

# find all the "root points" (see larger comment above)

286

# find all the "root points" (see larger comment above)

287

if p1 != nullrev and p1 in ancestors:

287

if p1 != nullrev and p1 in ancestors:

288

roots.add(p1)

288

roots.add(p1)

289

if p2 != nullrev and p2 in ancestors:

289

if p2 != nullrev and p2 in ancestors:

290

roots.add(p2)

290

roots.add(p2)

291

if not roots:

291

if not roots:

292

# no common revision to track copies from

292

# no common revision to track copies from

293

return {}

293

return {}

294

if has_graph_roots:

294

if has_graph_roots:

295

# this deal with the special case mentionned in the [1] footnotes. We

295

# this deal with the special case mentionned in the [1] footnotes. We

296

# must filter out revisions that leads to non-common graphroots.

296

# must filter out revisions that leads to non-common graphroots.

297

roots = list(roots)

297

roots = list(roots)

298

m = min(roots)

298

m = min(roots)

299

h = [b.rev()]

299

h = [b.rev()]

300

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

300

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

301

roots_to_head = set(roots_to_head)

301

roots_to_head = set(roots_to_head)

302

revs = [r for r in revs if r in roots_to_head]

302

revs = [r for r in revs if r in roots_to_head]

303

304

if repo.filecopiesmode == b'changeset-sidedata':

304

if repo.filecopiesmode == b'changeset-sidedata':

305

# When using side-data, we will process the edges "from" the children.

305

# When using side-data, we will process the edges "from" the children.

306

# We iterate over the childre, gathering previous collected data for

306

# We iterate over the childre, gathering previous collected data for

307

# the parents. Do know when the parents data is no longer necessary, we

307

# the parents. Do know when the parents data is no longer necessary, we

308

# keep a counter of how many children each revision has.

308

# keep a counter of how many children each revision has.

309

#

309

#

310

# An interresting property of `children_count` is that it only contains

310

# An interresting property of `children_count` is that it only contains

311

# revision that will be relevant for a edge of the graph. So if a

311

# revision that will be relevant for a edge of the graph. So if a

312

# children has parent not in `children_count`, that edges should not be

312

# children has parent not in `children_count`, that edges should not be

313

# processed.

313

# processed.

314

children_count = dict((r, 0) for r in roots)

314

children_count = dict((r, 0) for r in roots)

315

for r in revs:

315

for r in revs:

316

for p in cl.parentrevs(r):

316

for p in cl.parentrevs(r):

317

if p == nullrev:

317

if p == nullrev:

318

continue

318

continue

319

children_count[r] = 0

319

children_count[r] = 0

320

if p in children_count:

320

if p in children_count:

321

children_count[p] += 1

321

children_count[p] += 1

322

revinfo = _revinfo_getter(repo, match)

322

revinfo = _revinfo_getter(repo, match)

323

return _combine_changeset_copies(

323

return _combine_changeset_copies(

324

revs, children_count, b.rev(), revinfo, match, isancestor

324

revs, children_count, b.rev(), revinfo, match, isancestor

325

)

325

)

326

else:

326

else:

327

# When not using side-data, we will process the edges "from" the parent.

327

# When not using side-data, we will process the edges "from" the parent.

328

# so we need a full mapping of the parent -> children relation.

328

# so we need a full mapping of the parent -> children relation.

329

children = dict((r, []) for r in roots)

329

children = dict((r, []) for r in roots)

330

for r in revs:

330

for r in revs:

331

for p in cl.parentrevs(r):

331

for p in cl.parentrevs(r):

332

if p == nullrev:

332

if p == nullrev:

333

continue

333

continue

334

children[r] = []

334

children[r] = []

335

if p in children:

335

if p in children:

336

children[p].append(r)

336

children[p].append(r)

337

x = revs.pop()

337

x = revs.pop()

338

assert x == b.rev()

338

assert x == b.rev()

339

revs.extend(roots)

339

revs.extend(roots)

340

revs.sort()

340

revs.sort()

341

342

revinfo = _revinfo_getter_extra(repo)

342

revinfo = _revinfo_getter_extra(repo)

343

return _combine_changeset_copies_extra(

343

return _combine_changeset_copies_extra(

344

revs, children, b.rev(), revinfo, match, isancestor

344

revs, children, b.rev(), revinfo, match, isancestor

345

)

345

)

346

347

348

def _combine_changeset_copies(

348

def _combine_changeset_copies(

349

revs, children_count, targetrev, revinfo, match, isancestor

349

revs, children_count, targetrev, revinfo, match, isancestor

350

):

350

):

351

"""combine the copies information for each item of iterrevs

351

"""combine the copies information for each item of iterrevs

352

353

revs: sorted iterable of revision to visit

353

revs: sorted iterable of revision to visit

354

children_count: a {parent: <number-of-relevant-children>} mapping.

354

children_count: a {parent: <number-of-relevant-children>} mapping.

355

targetrev: the final copies destination revision (not in iterrevs)

355

targetrev: the final copies destination revision (not in iterrevs)

356

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

356

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

357

match: a matcher

357

match: a matcher

358

359

It returns the aggregated copies information for `targetrev`.

359

It returns the aggregated copies information for `targetrev`.

360

"""

360

"""

361

362

alwaysmatch = match.always()

362

alwaysmatch = match.always()

363

364

if rustmod is not None:

364

if rustmod is not None:

365

final_copies = rustmod.combine_changeset_copies(

365

final_copies = rustmod.combine_changeset_copies(

366

list(revs), children_count, targetrev, revinfo, isancestor

366

list(revs), children_count, targetrev, revinfo, isancestor

367

)

367

)

368

else:

368

else:

369

isancestor = cached_is_ancestor(isancestor)

369

isancestor = cached_is_ancestor(isancestor)

370

371

all_copies = {}

371

all_copies = {}

372

# iterate over all the "children" side of copy tracing "edge"

372

# iterate over all the "children" side of copy tracing "edge"

373

for current_rev in revs:

373

for current_rev in revs:

374

p1, p2, changes = revinfo(current_rev)

374

p1, p2, changes = revinfo(current_rev)

375

current_copies = None

375

current_copies = None

376

# iterate over all parents to chain the existing data with the

376

# iterate over all parents to chain the existing data with the

377

# data from the parent → child edge.

377

# data from the parent → child edge.

378

for parent, parent_rev in ((1, p1), (2, p2)):

378

for parent, parent_rev in ((1, p1), (2, p2)):

379

if parent_rev == nullrev:

379

if parent_rev == nullrev:

380

continue

380

continue

381

remaining_children = children_count.get(parent_rev)

381

remaining_children = children_count.get(parent_rev)

382

if remaining_children is None:

382

if remaining_children is None:

383

continue

383

continue

384

remaining_children -= 1

384

remaining_children -= 1

385

children_count[parent_rev] = remaining_children

385

children_count[parent_rev] = remaining_children

386

if remaining_children:

386

if remaining_children:

387

copies = all_copies.get(parent_rev, None)

387

copies = all_copies.get(parent_rev, None)

388

else:

388

else:

389

copies = all_copies.pop(parent_rev, None)

389

copies = all_copies.pop(parent_rev, None)

390

391

if copies is None:

391

if copies is None:

392

# this is a root

392

# this is a root

393

newcopies = copies = {}

393

newcopies = copies = {}

394

elif remaining_children:

394

elif remaining_children:

395

newcopies = copies.copy()

395

newcopies = copies.copy()

396

else:

396

else:

397

newcopies = copies

397

newcopies = copies

398

# chain the data in the edge with the existing data

398

# chain the data in the edge with the existing data

399

if changes is not None:

399

if changes is not None:

400

childcopies = {}

400

childcopies = {}

401

if parent == 1:

401

if parent == 1:

402

childcopies = changes.copied_from_p1

402

childcopies = changes.copied_from_p1

403

elif parent == 2:

403

elif parent == 2:

404

childcopies = changes.copied_from_p2

404

childcopies = changes.copied_from_p2

405

406

if childcopies:

406

if childcopies:

407

newcopies = copies.copy()

407

newcopies = copies.copy()

408

for dest, source in pycompat.iteritems(childcopies):

408

for dest, source in pycompat.iteritems(childcopies):

409

prev = copies.get(source)

409

prev = copies.get(source)

410

if prev is not None and prev[1] is not None:

410

if prev is not None and prev[1] is not None:

411

source = prev[1]

411

source = prev[1]

412

newcopies[dest] = (current_rev, source)

412

newcopies[dest] = (current_rev, source)

413

assert newcopies is not copies

413

assert newcopies is not copies

414

if changes.removed:

414

if changes.removed:

415

for f in changes.removed:

415

for f in changes.removed:

416

if f in newcopies:

416

if f in newcopies:

417

if newcopies is copies:

417

if newcopies is copies:

418

# copy on write to avoid affecting potential other

418

# copy on write to avoid affecting potential other

419

# branches. when there are no other branches, this

419

# branches. when there are no other branches, this

420

# could be avoided.

420

# could be avoided.

421

newcopies = copies.copy()

421

newcopies = copies.copy()

422

newcopies[f] = (current_rev, None)

422

newcopies[f] = (current_rev, None)

423

# check potential need to combine the data from another parent (for

423

# check potential need to combine the data from another parent (for

424

# that child). See comment below for details.

424

# that child). See comment below for details.

425

if current_copies is None:

425

if current_copies is None:

426

current_copies = newcopies

426

current_copies = newcopies

427

else:

427

else:

428

# we are the second parent to work on c, we need to merge our

428

# we are the second parent to work on c, we need to merge our

429

# work with the other.

429

# work with the other.

430

#

430

#

431

# In case of conflict, parent 1 take precedence over parent 2.

431

# In case of conflict, parent 1 take precedence over parent 2.

432

# This is an arbitrary choice made anew when implementing

432

# This is an arbitrary choice made anew when implementing

433

# changeset based copies. It was made without regards with

433

# changeset based copies. It was made without regards with

434

# potential filelog related behavior.

434

# potential filelog related behavior.

435

assert parent == 2

435

assert parent == 2

436

current_copies = _merge_copies_dict(

436

current_copies = _merge_copies_dict(

437

newcopies, current_copies, isancestor, changes

437

newcopies, current_copies, isancestor, changes

438

)

438

)

439

all_copies[current_rev] = current_copies

439

all_copies[current_rev] = current_copies

440

441

# filter out internal details and return a {dest: source mapping}

441

# filter out internal details and return a {dest: source mapping}

442

final_copies = {}

442

final_copies = {}

443

for dest, (tt, source) in all_copies[targetrev].items():

443

for dest, (tt, source) in all_copies[targetrev].items():

444

if source is not None:

444

if source is not None:

445

final_copies[dest] = source

445

final_copies[dest] = source

446

if not alwaysmatch:

446

if not alwaysmatch:

447

for filename in list(final_copies.keys()):

447

for filename in list(final_copies.keys()):

448

if not match(filename):

448

if not match(filename):

449

del final_copies[filename]

449

del final_copies[filename]

450

return final_copies

450

return final_copies

451

452

453

# constant to decide which side to pick with _merge_copies_dict

453

# constant to decide which side to pick with _merge_copies_dict

454

PICK_MINOR = 0

454

PICK_MINOR = 0

455

PICK_MAJOR = 1

455

PICK_MAJOR = 1

456

PICK_EITHER = 2

456

PICK_EITHER = 2

457

458

459

def _merge_copies_dict(minor, major, isancestor, changes):

459

def _merge_copies_dict(minor, major, isancestor, changes):

460

"""merge two copies-mapping together, minor and major

460

"""merge two copies-mapping together, minor and major

461

462

In case of conflict, value from "major" will be picked.

462

In case of conflict, value from "major" will be picked.

463

464

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

464

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

465

ancestors of `high_rev`,

465

ancestors of `high_rev`,

466

467

- `ismerged(path)`: callable return True if `path` have been merged in the

467

- `ismerged(path)`: callable return True if `path` have been merged in the

468

current revision,

468

current revision,

469

470

return the resulting dict (in practice, the "minor" object, updated)

470

return the resulting dict (in practice, the "minor" object, updated)

471

"""

471

"""

472

for dest, value in major.items():

472

for dest, value in major.items():

473

other = minor.get(dest)

473

other = minor.get(dest)

474

if other is None:

474

if other is None:

475

minor[dest] = value

475

minor[dest] = value

476

else:

476

else:

477

pick = _compare_values(changes, isancestor, dest, other, value)

477

pick = _compare_values(changes, isancestor, dest, other, value)

478

if pick == PICK_MAJOR:

478

if pick == PICK_MAJOR:

479

minor[dest] = value

479

minor[dest] = value

480

return minor

480

return minor

481

482

483

def _compare_values(changes, isancestor, dest, minor, major):

483

def _compare_values(changes, isancestor, dest, minor, major):

484

"""compare two value within a _merge_copies_dict loop iteration"""

484

"""compare two value within a _merge_copies_dict loop iteration"""

485

major_tt, major_value = major

485

major_tt, major_value = major

486

minor_tt, minor_value = minor

486

minor_tt, minor_value = minor

487

488

# evacuate some simple case first:

488

# evacuate some simple case first:

489

if major_tt == minor_tt:

489

if major_tt == minor_tt:

490

# if it comes from the same revision it must be the same value

490

# if it comes from the same revision it must be the same value

491

assert major_value == minor_value

491

assert major_value == minor_value

492

return PICK_EITHER

492

return PICK_EITHER

493

elif major[1] == minor[1]:

493

elif major[1] == minor[1]:

494

return PICK_EITHER

494

return PICK_EITHER

495

496

# actual merging needed: content from "major" wins, unless it is older than

496

# actual merging needed: content from "major" wins, unless it is older than

497

# the branch point or there is a merge

497

# the branch point or there is a merge

498

elif changes is not None and major[1] is None and dest in changes.salvaged:

498

elif changes is not None and major[1] is None and dest in changes.salvaged:

499

return PICK_MINOR

499

return PICK_MINOR

500

elif changes is not None and minor[1] is None and dest in changes.salvaged:

500

elif changes is not None and minor[1] is None and dest in changes.salvaged:

501

return PICK_MAJOR

501

return PICK_MAJOR

502

elif changes is not None and dest in changes.merged:

502

elif changes is not None and dest in changes.merged:

503

return PICK_MAJOR

503

return PICK_MAJOR

504

elif not isancestor(major_tt, minor_tt):

504

elif not isancestor(major_tt, minor_tt):

505

if major[1] is not None:

505

if major[1] is not None:

506

return PICK_MAJOR

506

return PICK_MAJOR

507

elif isancestor(minor_tt, major_tt):

507

elif isancestor(minor_tt, major_tt):

508

return PICK_MAJOR

508

return PICK_MAJOR

509

return PICK_MINOR

509

return PICK_MINOR

510

511

512

def _revinfo_getter_extra(repo):

512

def _revinfo_getter_extra(repo):

513

"""return a function that return multiple data given a <rev>"i

513

"""return a function that return multiple data given a <rev>"i

514

515

* p1: revision number of first parent

515

* p1: revision number of first parent

516

* p2: revision number of first parent

516

* p2: revision number of first parent

517

* p1copies: mapping of copies from p1

517

* p1copies: mapping of copies from p1

518

* p2copies: mapping of copies from p2

518

* p2copies: mapping of copies from p2

519

* removed: a list of removed files

519

* removed: a list of removed files

520

* ismerged: a callback to know if file was merged in that revision

520

* ismerged: a callback to know if file was merged in that revision

521

"""

521

"""

522

cl = repo.changelog

522

cl = repo.changelog

523

parents = cl.parentrevs

523

parents = cl.parentrevs

524

525

def get_ismerged(rev):

525

def get_ismerged(rev):

526

ctx = repo[rev]

526

ctx = repo[rev]

527

528

def ismerged(path):

528

def ismerged(path):

529

if path not in ctx.files():

529

if path not in ctx.files():

530

return False

530

return False

531

fctx = ctx[path]

531

fctx = ctx[path]

532

parents = fctx._filelog.parents(fctx._filenode)

532

parents = fctx._filelog.parents(fctx._filenode)

533

nb_parents = 0

533

nb_parents = 0

534

for n in parents:

534

for n in parents:

535

if n != nullid:

535

if n != nullid:

536

nb_parents += 1

536

nb_parents += 1

537

return nb_parents >= 2

537

return nb_parents >= 2

538

539

return ismerged

539

return ismerged

540

541

def revinfo(rev):

541

def revinfo(rev):

542

p1, p2 = parents(rev)

542

p1, p2 = parents(rev)

543

ctx = repo[rev]

543

ctx = repo[rev]

544

p1copies, p2copies = ctx._copies

544

p1copies, p2copies = ctx._copies

545

removed = ctx.filesremoved()

545

removed = ctx.filesremoved()

546

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

546

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

547

548

return revinfo

548

return revinfo

549

550

551

def _combine_changeset_copies_extra(

551

def _combine_changeset_copies_extra(

552

revs, children, targetrev, revinfo, match, isancestor

552

revs, children, targetrev, revinfo, match, isancestor

553

):

553

):

554

"""version of `_combine_changeset_copies` that works with the Google

554

"""version of `_combine_changeset_copies` that works with the Google

555

specific "extra" based storage for copy information"""

555

specific "extra" based storage for copy information"""

556

all_copies = {}

556

all_copies = {}

557

alwaysmatch = match.always()

557

alwaysmatch = match.always()

558

for r in revs:

558

for r in revs:

559

copies = all_copies.pop(r, None)

559

copies = all_copies.pop(r, None)

560

if copies is None:

560

if copies is None:

561

# this is a root

561

# this is a root

562

copies = {}

562

copies = {}

563

for i, c in enumerate(children[r]):

563

for i, c in enumerate(children[r]):

564

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

564

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

565

if r == p1:

565

if r == p1:

566

parent = 1

566

parent = 1

567

childcopies = p1copies

567

childcopies = p1copies

568

else:

568

else:

569

assert r == p2

569

assert r == p2

570

parent = 2

570

parent = 2

571

childcopies = p2copies

571

childcopies = p2copies

572

if not alwaysmatch:

572

if not alwaysmatch:

573

childcopies = {

573

childcopies = {

574

dst: src for dst, src in childcopies.items() if match(dst)

574

dst: src for dst, src in childcopies.items() if match(dst)

575

}

575

}

576

newcopies = copies

576

newcopies = copies

577

if childcopies:

577

if childcopies:

578

newcopies = copies.copy()

578

newcopies = copies.copy()

579

for dest, source in pycompat.iteritems(childcopies):

579

for dest, source in pycompat.iteritems(childcopies):

580

prev = copies.get(source)

580

prev = copies.get(source)

581

if prev is not None and prev[1] is not None:

581

if prev is not None and prev[1] is not None:

582

source = prev[1]

582

source = prev[1]

583

newcopies[dest] = (c, source)

583

newcopies[dest] = (c, source)

584

assert newcopies is not copies

584

assert newcopies is not copies

585

for f in removed:

585

for f in removed:

586

if f in newcopies:

586

if f in newcopies:

587

if newcopies is copies:

587

if newcopies is copies:

588

# copy on write to avoid affecting potential other

588

# copy on write to avoid affecting potential other

589

# branches. when there are no other branches, this

589

# branches. when there are no other branches, this

590

# could be avoided.

590

# could be avoided.

591

newcopies = copies.copy()

591

newcopies = copies.copy()

592

newcopies[f] = (c, None)

592

newcopies[f] = (c, None)

593

othercopies = all_copies.get(c)

593

othercopies = all_copies.get(c)

594

if othercopies is None:

594

if othercopies is None:

595

all_copies[c] = newcopies

595

all_copies[c] = newcopies

596

else:

596

else:

597

# we are the second parent to work on c, we need to merge our

597

# we are the second parent to work on c, we need to merge our

598

# work with the other.

598

# work with the other.

599

#

599

#

600

# In case of conflict, parent 1 take precedence over parent 2.

600

# In case of conflict, parent 1 take precedence over parent 2.

601

# This is an arbitrary choice made anew when implementing

601

# This is an arbitrary choice made anew when implementing

602

# changeset based copies. It was made without regards with

602

# changeset based copies. It was made without regards with

603

# potential filelog related behavior.

603

# potential filelog related behavior.

604

if parent == 1:

604

if parent == 1:

605

_merge_copies_dict_extra(

605

_merge_copies_dict_extra(

606

othercopies, newcopies, isancestor, ismerged

606

othercopies, newcopies, isancestor, ismerged

607

)

607

)

608

else:

608

else:

609

_merge_copies_dict_extra(

609

_merge_copies_dict_extra(

610

newcopies, othercopies, isancestor, ismerged

610

newcopies, othercopies, isancestor, ismerged

611

)

611

)

612

all_copies[c] = newcopies

612

all_copies[c] = newcopies

613

614

final_copies = {}

614

final_copies = {}

615

for dest, (tt, source) in all_copies[targetrev].items():

615

for dest, (tt, source) in all_copies[targetrev].items():

616

if source is not None:

616

if source is not None:

617

final_copies[dest] = source

617

final_copies[dest] = source

618

return final_copies

618

return final_copies

619

620

621

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

621

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

622

"""version of `_merge_copies_dict` that works with the Google

622

"""version of `_merge_copies_dict` that works with the Google

623

specific "extra" based storage for copy information"""

623

specific "extra" based storage for copy information"""

624

for dest, value in major.items():

624

for dest, value in major.items():

625

other = minor.get(dest)

625

other = minor.get(dest)

626

if other is None:

626

if other is None:

627

minor[dest] = value

627

minor[dest] = value

628

else:

628

else:

629

new_tt = value[0]

629

new_tt = value[0]

630

other_tt = other[0]

630

other_tt = other[0]

631

if value[1] == other[1]:

631

if value[1] == other[1]:

632

continue

632

continue

633

# content from "major" wins, unless it is older

633

# content from "major" wins, unless it is older

634

# than the branch point or there is a merge

634

# than the branch point or there is a merge

635

if (

635

if (

636

new_tt == other_tt

636

new_tt == other_tt

637

or not isancestor(new_tt, other_tt)

637

or not isancestor(new_tt, other_tt)

638

or ismerged(dest)

638

or ismerged(dest)

639

):

639

):

640

minor[dest] = value

640

minor[dest] = value

641

642

643

def _forwardcopies(a, b, base=None, match=None):

643

def _forwardcopies(a, b, base=None, match=None):

644

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

644

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

645

646

if base is None:

646

if base is None:

647

base = a

647

base = a

648

match = a.repo().narrowmatch(match)

648

match = a.repo().narrowmatch(match)

649

# check for working copy

649

# check for working copy

650

if b.rev() is None:

650

if b.rev() is None:

651

cm = _committedforwardcopies(a, b.p1(), base, match)

651

cm = _committedforwardcopies(a, b.p1(), base, match)

652

# combine copies from dirstate if necessary

652

# combine copies from dirstate if necessary

653

copies = _chain(cm, _dirstatecopies(b._repo, match))

653

copies = _chain(cm, _dirstatecopies(b._repo, match))

654

else:

654

else:

655

copies = _committedforwardcopies(a, b, base, match)

655

copies = _committedforwardcopies(a, b, base, match)

656

return copies

656

return copies

657

658

659

def _backwardrenames(a, b, match):

659

def _backwardrenames(a, b, match):

660

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

660

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

661

return {}

661

return {}

662

663

# Even though we're not taking copies into account, 1:n rename situations

663

# Even though we're not taking copies into account, 1:n rename situations

664

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

664

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

665

# arbitrarily pick one of the renames.

665

# arbitrarily pick one of the renames.

666

# We don't want to pass in "match" here, since that would filter

666

# We don't want to pass in "match" here, since that would filter

667

# the destination by it. Since we're reversing the copies, we want

667

# the destination by it. Since we're reversing the copies, we want

668

# to filter the source instead.

668

# to filter the source instead.

669

f = _forwardcopies(b, a)

669

f = _forwardcopies(b, a)

670

r = {}

670

r = {}

671

for k, v in sorted(pycompat.iteritems(f)):

671

for k, v in sorted(pycompat.iteritems(f)):

672

if match and not match(v):

672

if match and not match(v):

673

continue

673

continue

674

# remove copies

674

# remove copies

675

if v in a:

675

if v in a:

676

continue

676

continue

677

r[v] = k

677

r[v] = k

678

return r

678

return r

679

680

681

def pathcopies(x, y, match=None):

681

def pathcopies(x, y, match=None):

682

"""find {dst@y: src@x} copy mapping for directed compare"""

682

"""find {dst@y: src@x} copy mapping for directed compare"""

683

repo = x._repo

683

repo = x._repo

684

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

684

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

685

if debug:

685

if debug:

686

repo.ui.debug(

686

repo.ui.debug(

687

b'debug.copies: searching copies from %s to %s\n' % (x, y)

687

b'debug.copies: searching copies from %s to %s\n' % (x, y)

688

)

688

)

689

if x == y or not x or not y:

689

if x == y or not x or not y:

690

return {}

690

return {}

691

if y.rev() is None and x == y.p1():

691

if y.rev() is None and x == y.p1():

692

if debug:

692

if debug:

693

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

693

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

694

# short-circuit to avoid issues with merge states

694

# short-circuit to avoid issues with merge states

695

return _dirstatecopies(repo, match)

695

return _dirstatecopies(repo, match)

696

a = y.ancestor(x)

696

a = y.ancestor(x)

697

if a == x:

697

if a == x:

698

if debug:

698

if debug:

699

repo.ui.debug(b'debug.copies: search mode: forward\n')

699

repo.ui.debug(b'debug.copies: search mode: forward\n')

700

copies = _forwardcopies(x, y, match=match)

700

copies = _forwardcopies(x, y, match=match)

701

elif a == y:

701

elif a == y:

702

if debug:

702

if debug:

703

repo.ui.debug(b'debug.copies: search mode: backward\n')

703

repo.ui.debug(b'debug.copies: search mode: backward\n')

704

copies = _backwardrenames(x, y, match=match)

704

copies = _backwardrenames(x, y, match=match)

705

else:

705

else:

706

if debug:

706

if debug:

707

repo.ui.debug(b'debug.copies: search mode: combined\n')

707

repo.ui.debug(b'debug.copies: search mode: combined\n')

708

base = None

708

base = None

709

if a.rev() != nullrev:

709

if a.rev() != nullrev:

710

base = x

710

base = x

711

copies = _chain(

711

copies = _chain(

712

_backwardrenames(x, a, match=match),

712

_backwardrenames(x, a, match=match),

713

_forwardcopies(a, y, base, match=match),

713

_forwardcopies(a, y, base, match=match),

714

)

714

)

715

_filter(x, y, copies)

715

_filter(x, y, copies)

716

return copies

716

return copies

717

718

719

def mergecopies(repo, c1, c2, base):

719

def mergecopies(repo, c1, c2, base):

720

"""

720

"""

721

Finds moves and copies between context c1 and c2 that are relevant for

721

Finds moves and copies between context c1 and c2 that are relevant for

722

merging. 'base' will be used as the merge base.

722

merging. 'base' will be used as the merge base.

723

724

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

724

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

725

files that were moved/ copied in one merge parent and modified in another.

725

files that were moved/ copied in one merge parent and modified in another.

726

For example:

726

For example:

727

728

o ---> 4 another commit

728

o ---> 4 another commit

729

|

729

|

730

| o ---> 3 commit that modifies a.txt

730

| o ---> 3 commit that modifies a.txt

731

| /

731

| /

732

o / ---> 2 commit that moves a.txt to b.txt

732

o / ---> 2 commit that moves a.txt to b.txt

733

|/

733

|/

734

o ---> 1 merge base

734

o ---> 1 merge base

735

736

If we try to rebase revision 3 on revision 4, since there is no a.txt in

736

If we try to rebase revision 3 on revision 4, since there is no a.txt in

737

revision 4, and if user have copytrace disabled, we prints the following

737

revision 4, and if user have copytrace disabled, we prints the following

738

message:

738

message:

739

740

```other changed <file> which local deleted```

740

```other changed <file> which local deleted```

741

742

Returns a tuple where:

742

Returns a tuple where:

743

744

"branch_copies" an instance of branch_copies.

744

"branch_copies" an instance of branch_copies.

745

746

"diverge" is a mapping of source name -> list of destination names

746

"diverge" is a mapping of source name -> list of destination names

747

for divergent renames.

747

for divergent renames.

748

749

This function calls different copytracing algorithms based on config.

749

This function calls different copytracing algorithms based on config.

750

"""

750

"""

751

# avoid silly behavior for update from empty dir

751

# avoid silly behavior for update from empty dir

752

if not c1 or not c2 or c1 == c2:

752

if not c1 or not c2 or c1 == c2:

753

return branch_copies(), branch_copies(), {}

753

return branch_copies(), branch_copies(), {}

754

755

narrowmatch = c1.repo().narrowmatch()

755

narrowmatch = c1.repo().narrowmatch()

756

757

# avoid silly behavior for parent -> working dir

757

# avoid silly behavior for parent -> working dir

758

if c2.node() is None and c1.node() == repo.dirstate.p1():

758

if c2.node() is None and c1.node() == repo.dirstate.p1():

759

return (

759

return (

760

branch_copies(_dirstatecopies(repo, narrowmatch)),

760

branch_copies(_dirstatecopies(repo, narrowmatch)),

761

branch_copies(),

761

branch_copies(),

762

{},

762

{},

763

)

763

)

764

765

copytracing = repo.ui.config(b'experimental', b'copytrace')

765

copytracing = repo.ui.config(b'experimental', b'copytrace')

766

if stringutil.parsebool(copytracing) is False:

766

if stringutil.parsebool(copytracing) is False:

767

# stringutil.parsebool() returns None when it is unable to parse the

767

# stringutil.parsebool() returns None when it is unable to parse the

768

# value, so we should rely on making sure copytracing is on such cases

768

# value, so we should rely on making sure copytracing is on such cases

769

return branch_copies(), branch_copies(), {}

769

return branch_copies(), branch_copies(), {}

770

771

if usechangesetcentricalgo(repo):

771

if usechangesetcentricalgo(repo):

772

# The heuristics don't make sense when we need changeset-centric algos

772

# The heuristics don't make sense when we need changeset-centric algos

773

return _fullcopytracing(repo, c1, c2, base)

773

return _fullcopytracing(repo, c1, c2, base)

774

775

# Copy trace disabling is explicitly below the node == p1 logic above

775

# Copy trace disabling is explicitly below the node == p1 logic above

776

# because the logic above is required for a simple copy to be kept across a

776

# because the logic above is required for a simple copy to be kept across a

777

# rebase.

777

# rebase.

778

if copytracing == b'heuristics':

778

if copytracing == b'heuristics':

779

# Do full copytracing if only non-public revisions are involved as

779

# Do full copytracing if only non-public revisions are involved as

780

# that will be fast enough and will also cover the copies which could

780

# that will be fast enough and will also cover the copies which could

781

# be missed by heuristics

781

# be missed by heuristics

782

if _isfullcopytraceable(repo, c1, base):

782

if _isfullcopytraceable(repo, c1, base):

783

return _fullcopytracing(repo, c1, c2, base)

783

return _fullcopytracing(repo, c1, c2, base)

784

return _heuristicscopytracing(repo, c1, c2, base)

784

return _heuristicscopytracing(repo, c1, c2, base)

785

else:

785

else:

786

return _fullcopytracing(repo, c1, c2, base)

786

return _fullcopytracing(repo, c1, c2, base)

787

788

789

def _isfullcopytraceable(repo, c1, base):

789

def _isfullcopytraceable(repo, c1, base):

790

"""Checks that if base, source and destination are all no-public branches,

790

"""Checks that if base, source and destination are all no-public branches,

791

if yes let's use the full copytrace algorithm for increased capabilities

791

if yes let's use the full copytrace algorithm for increased capabilities

792

since it will be fast enough.

792

since it will be fast enough.

793

794

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

794

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

795

number of changesets from c1 to base such that if number of changesets are

795

number of changesets from c1 to base such that if number of changesets are

796

more than the limit, full copytracing algorithm won't be used.

796

more than the limit, full copytracing algorithm won't be used.

797

"""

797

"""

798

if c1.rev() is None:

798

if c1.rev() is None:

799

c1 = c1.p1()

799

c1 = c1.p1()

800

if c1.mutable() and base.mutable():

800

if c1.mutable() and base.mutable():

801

sourcecommitlimit = repo.ui.configint(

801

sourcecommitlimit = repo.ui.configint(

802

b'experimental', b'copytrace.sourcecommitlimit'

802

b'experimental', b'copytrace.sourcecommitlimit'

803

)

803

)

804

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

804

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

805

return commits < sourcecommitlimit

805

return commits < sourcecommitlimit

806

return False

806

return False

807

808

809

def _checksinglesidecopies(

809

def _checksinglesidecopies(

810

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

810

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

811

):

811

):

812

if src not in m2:

812

if src not in m2:

813

# deleted on side 2

813

# deleted on side 2

814

if src not in m1:

814

if src not in m1:

815

# renamed on side 1, deleted on side 2

815

# renamed on side 1, deleted on side 2

816

renamedelete[src] = dsts1

816

renamedelete[src] = dsts1

817

elif src not in mb:

817

elif src not in mb:

818

# Work around the "short-circuit to avoid issues with merge states"

818

# Work around the "short-circuit to avoid issues with merge states"

819

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

819

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

820

# destination doesn't exist in y.

820

# destination doesn't exist in y.

821

pass

821

pass

822

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

822

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

823

return

823

return

824

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

824

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

825

# modified on side 2

825

# modified on side 2

826

for dst in dsts1:

826

for dst in dsts1:

827

copy[dst] = src

827

copy[dst] = src

828

829

830

class branch_copies(object):

830

class branch_copies(object):

831

"""Information about copies made on one side of a merge/graft.

831

"""Information about copies made on one side of a merge/graft.

832

833

"copy" is a mapping from destination name -> source name,

833

"copy" is a mapping from destination name -> source name,

834

where source is in c1 and destination is in c2 or vice-versa.

834

where source is in c1 and destination is in c2 or vice-versa.

835

836

"movewithdir" is a mapping from source name -> destination name,

836

"movewithdir" is a mapping from source name -> destination name,

837

where the file at source present in one context but not the other

837

where the file at source present in one context but not the other

838

needs to be moved to destination by the merge process, because the

838

needs to be moved to destination by the merge process, because the

839

other context moved the directory it is in.

839

other context moved the directory it is in.

840

841

"renamedelete" is a mapping of source name -> list of destination

841

"renamedelete" is a mapping of source name -> list of destination

842

names for files deleted in c1 that were renamed in c2 or vice-versa.

842

names for files deleted in c1 that were renamed in c2 or vice-versa.

843

844

"dirmove" is a mapping of detected source dir -> destination dir renames.

844

"dirmove" is a mapping of detected source dir -> destination dir renames.

845

This is needed for handling changes to new files previously grafted into

845

This is needed for handling changes to new files previously grafted into

846

renamed directories.

846

renamed directories.

847

"""

847

"""

848

849

def __init__(

849

def __init__(

850

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

850

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

851

):

851

):

852

self.copy = {} if copy is None else copy

852

self.copy = {} if copy is None else copy

853

self.renamedelete = {} if renamedelete is None else renamedelete

853

self.renamedelete = {} if renamedelete is None else renamedelete

854

self.dirmove = {} if dirmove is None else dirmove

854

self.dirmove = {} if dirmove is None else dirmove

855

self.movewithdir = {} if movewithdir is None else movewithdir

855

self.movewithdir = {} if movewithdir is None else movewithdir

856

857

def __repr__(self):

857

def __repr__(self):

858

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

858

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

859

self.copy,

859

self.copy,

860

self.renamedelete,

860

self.renamedelete,

861

self.dirmove,

861

self.dirmove,

862

self.movewithdir,

862

self.movewithdir,

863

)

863

)

864

865

866

def _fullcopytracing(repo, c1, c2, base):

866

def _fullcopytracing(repo, c1, c2, base):

867

"""The full copytracing algorithm which finds all the new files that were

867

"""The full copytracing algorithm which finds all the new files that were

868

added from merge base up to the top commit and for each file it checks if

868

added from merge base up to the top commit and for each file it checks if

869

this file was copied from another file.

869

this file was copied from another file.

870

871

This is pretty slow when a lot of changesets are involved but will track all

871

This is pretty slow when a lot of changesets are involved but will track all

872

the copies.

872

the copies.

873

"""

873

"""

874

m1 = c1.manifest()

874

m1 = c1.manifest()

875

m2 = c2.manifest()

875

m2 = c2.manifest()

876

mb = base.manifest()

876

mb = base.manifest()

877

878

copies1 = pathcopies(base, c1)

878

copies1 = pathcopies(base, c1)

879

copies2 = pathcopies(base, c2)

879

copies2 = pathcopies(base, c2)

880

881

if not (copies1 or copies2):

881

if not (copies1 or copies2):

882

return branch_copies(), branch_copies(), {}

882

return branch_copies(), branch_copies(), {}

883

884

inversecopies1 = {}

884

inversecopies1 = {}

885

inversecopies2 = {}

885

inversecopies2 = {}

886

for dst, src in copies1.items():

886

for dst, src in copies1.items():

887

inversecopies1.setdefault(src, []).append(dst)

887

inversecopies1.setdefault(src, []).append(dst)

888

for dst, src in copies2.items():

888

for dst, src in copies2.items():

889

inversecopies2.setdefault(src, []).append(dst)

889

inversecopies2.setdefault(src, []).append(dst)

890

891

copy1 = {}

891

copy1 = {}

892

copy2 = {}

892

copy2 = {}

893

diverge = {}

893

diverge = {}

894

renamedelete1 = {}

894

renamedelete1 = {}

895

renamedelete2 = {}

895

renamedelete2 = {}

896

allsources = set(inversecopies1) | set(inversecopies2)

896

allsources = set(inversecopies1) | set(inversecopies2)

897

for src in allsources:

897

for src in allsources:

898

dsts1 = inversecopies1.get(src)

898

dsts1 = inversecopies1.get(src)

899

dsts2 = inversecopies2.get(src)

899

dsts2 = inversecopies2.get(src)

900

if dsts1 and dsts2:

900

if dsts1 and dsts2:

901

# copied/renamed on both sides

901

# copied/renamed on both sides

902

if src not in m1 and src not in m2:

902

if src not in m1 and src not in m2:

903

# renamed on both sides

903

# renamed on both sides

904

dsts1 = set(dsts1)

904

dsts1 = set(dsts1)

905

dsts2 = set(dsts2)

905

dsts2 = set(dsts2)

906

# If there's some overlap in the rename destinations, we

906

# If there's some overlap in the rename destinations, we

907

# consider it not divergent. For example, if side 1 copies 'a'

907

# consider it not divergent. For example, if side 1 copies 'a'

908

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

908

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

909

# and 'd' and deletes 'a'.

909

# and 'd' and deletes 'a'.

910

if dsts1 & dsts2:

910

if dsts1 & dsts2:

911

for dst in dsts1 & dsts2:

911

for dst in dsts1 & dsts2:

912

copy1[dst] = src

912

copy1[dst] = src

913

copy2[dst] = src

913

copy2[dst] = src

914

else:

914

else:

915

diverge[src] = sorted(dsts1 | dsts2)

915

diverge[src] = sorted(dsts1 | dsts2)

916

elif src in m1 and src in m2:

916

elif src in m1 and src in m2:

917

# copied on both sides

917

# copied on both sides

918

dsts1 = set(dsts1)

918

dsts1 = set(dsts1)

919

dsts2 = set(dsts2)

919

dsts2 = set(dsts2)

920

for dst in dsts1 & dsts2:

920

for dst in dsts1 & dsts2:

921

copy1[dst] = src

921

copy1[dst] = src

922

copy2[dst] = src

922

copy2[dst] = src

923

# TODO: Handle cases where it was renamed on one side and copied

923

# TODO: Handle cases where it was renamed on one side and copied

924

# on the other side

924

# on the other side

925

elif dsts1:

925

elif dsts1:

926

# copied/renamed only on side 1

926

# copied/renamed only on side 1

927

_checksinglesidecopies(

927

_checksinglesidecopies(

928

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

928

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

929

)

929

)

930

elif dsts2:

930

elif dsts2:

931

# copied/renamed only on side 2

931

# copied/renamed only on side 2

932

_checksinglesidecopies(

932

_checksinglesidecopies(

933

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

933

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

934

)

934

)

935

936

# find interesting file sets from manifests

936

# find interesting file sets from manifests

937

cache = []

937

cache = []

938

939

def _get_addedfiles(idx):

939

def _get_addedfiles(idx):

940

if not cache:

940

if not cache:

941

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

941

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

942

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

942

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

943

u1 = sorted(addedinm1 - addedinm2)

943

u1 = sorted(addedinm1 - addedinm2)

944

u2 = sorted(addedinm2 - addedinm1)

944

u2 = sorted(addedinm2 - addedinm1)

945

cache.extend((u1, u2))

945

cache.extend((u1, u2))

946

return cache[idx]

946

return cache[idx]

947

948

u1fn = lambda: _get_addedfiles(0)

948

u1fn = lambda: _get_addedfiles(0)

949

u2fn = lambda: _get_addedfiles(1)

949

u2fn = lambda: _get_addedfiles(1)

950

if repo.ui.debugflag:

950

if repo.ui.debugflag:

951

u1 = u1fn()

951

u1 = u1fn()

952

u2 = u2fn()

952

u2 = u2fn()

953

954

header = b" unmatched files in %s"

954

header = b" unmatched files in %s"

955

if u1:

955

if u1:

956

repo.ui.debug(

956

repo.ui.debug(

957

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

957

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

958

)

958

)

959

if u2:

959

if u2:

960

repo.ui.debug(

960

repo.ui.debug(

961

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

961

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

962

)

962

)

963

964

renamedeleteset = set()

964

renamedeleteset = set()

965

divergeset = set()

965

divergeset = set()

966

for dsts in diverge.values():

966

for dsts in diverge.values():

967

divergeset.update(dsts)

967

divergeset.update(dsts)

968

for dsts in renamedelete1.values():

968

for dsts in renamedelete1.values():

969

renamedeleteset.update(dsts)

969

renamedeleteset.update(dsts)

970

for dsts in renamedelete2.values():

970

for dsts in renamedelete2.values():

971

renamedeleteset.update(dsts)

971

renamedeleteset.update(dsts)

972

973

repo.ui.debug(

973

repo.ui.debug(

974

b" all copies found (* = to merge, ! = divergent, "

974

b" all copies found (* = to merge, ! = divergent, "

975

b"% = renamed and deleted):\n"

975

b"% = renamed and deleted):\n"

976

)

976

)

977

for side, copies in ((b"local", copies1), (b"remote", copies2)):

977

for side, copies in ((b"local", copies1), (b"remote", copies2)):

978

if not copies:

978

if not copies:

979

continue

979

continue

980

repo.ui.debug(b" on %s side:\n" % side)

980

repo.ui.debug(b" on %s side:\n" % side)

981

for f in sorted(copies):

981

for f in sorted(copies):

982

note = b""

982

note = b""

983

if f in copy1 or f in copy2:

983

if f in copy1 or f in copy2:

984

note += b"*"

984

note += b"*"

985

if f in divergeset:

985

if f in divergeset:

986

note += b"!"

986

note += b"!"

987

if f in renamedeleteset:

987

if f in renamedeleteset:

988

note += b"%"

988

note += b"%"

989

repo.ui.debug(

989

repo.ui.debug(

990

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

990

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

991

)

991

)

992

del renamedeleteset

992

del renamedeleteset

993

del divergeset

993

del divergeset

994

995

repo.ui.debug(b" checking for directory renames\n")

995

repo.ui.debug(b" checking for directory renames\n")

996

997

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

997

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

998

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

998

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

999

1000

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1000

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1001

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1001

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1002

1003

return branch_copies1, branch_copies2, diverge

1003

return branch_copies1, branch_copies2, diverge

1004

1005

1006

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1006

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1007

"""Finds moved directories and files that should move with them.

1007

"""Finds moved directories and files that should move with them.

1008

1009

ctx: the context for one of the sides

1009

ctx: the context for one of the sides

1010

copy: files copied on the same side (as ctx)

1010

copy: files copied on the same side (as ctx)

1011

fullcopy: files copied on the same side (as ctx), including those that

1011

fullcopy: files copied on the same side (as ctx), including those that

1012

merge.manifestmerge() won't care about

1012

merge.manifestmerge() won't care about

1013

addedfilesfn: function returning added files on the other side (compared to

1013

addedfilesfn: function returning added files on the other side (compared to

1014

ctx)

1014

ctx)

1015

"""

1015

"""

1016

# generate a directory move map

1016

# generate a directory move map

1017

invalid = set()

1017

invalid = set()

1018

dirmove = {}

1018

dirmove = {}

1019

1020

# examine each file copy for a potential directory move, which is

1020

# examine each file copy for a potential directory move, which is

1021

# when all the files in a directory are moved to a new directory

1021

# when all the files in a directory are moved to a new directory

1022

for dst, src in pycompat.iteritems(fullcopy):

1022

for dst, src in pycompat.iteritems(fullcopy):

1023

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1023

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1024

if dsrc in invalid:

1024

if dsrc in invalid:

1025

# already seen to be uninteresting

1025

# already seen to be uninteresting

1026

continue

1026

continue

1027

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1027

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1028

# directory wasn't entirely moved locally

1028

# directory wasn't entirely moved locally

1029

invalid.add(dsrc)

1029

invalid.add(dsrc)

1030

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1030

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1031

# files from the same directory moved to two different places

1031

# files from the same directory moved to two different places

1032

invalid.add(dsrc)

1032

invalid.add(dsrc)

1033

else:

1033

else:

1034

# looks good so far

1034

# looks good so far

1035

dirmove[dsrc] = ddst

1035

dirmove[dsrc] = ddst

1036

1037

for i in invalid:

1037

for i in invalid:

1038

if i in dirmove:

1038

if i in dirmove:

1039

del dirmove[i]

1039

del dirmove[i]

1040

del invalid

1040

del invalid

1041

1042

if not dirmove:

1042

if not dirmove:

1043

return {}, {}

1043

return {}, {}

1044

1045

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1045

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1046

1047

for d in dirmove:

1047

for d in dirmove:

1048

repo.ui.debug(

1048

repo.ui.debug(

1049

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1049

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1050

)

1050

)

1051

1052

movewithdir = {}

1052

movewithdir = {}

1053

# check unaccounted nonoverlapping files against directory moves

1053

# check unaccounted nonoverlapping files against directory moves

1054

for f in addedfilesfn():

1054

for f in addedfilesfn():

1055

if f not in fullcopy:

1055

if f not in fullcopy:

1056

for d in dirmove:

1056

for d in dirmove:

1057

if f.startswith(d):

1057

if f.startswith(d):

1058

# new file added in a directory that was moved, move it

1058

# new file added in a directory that was moved, move it

1059

df = dirmove[d] + f[len(d) :]

1059

df = dirmove[d] + f[len(d) :]

1060

if df not in copy:

1060

if df not in copy:

1061

movewithdir[f] = df

1061

movewithdir[f] = df

1062

repo.ui.debug(

1062

repo.ui.debug(

1063

b" pending file src: '%s' -> dst: '%s'\n"

1063

b" pending file src: '%s' -> dst: '%s'\n"

1064

% (f, df)

1064

% (f, df)

1065

)

1065

)

1066

break

1066

break

1067

1068

return dirmove, movewithdir

1068

return dirmove, movewithdir

1069

1070

1071

def _heuristicscopytracing(repo, c1, c2, base):

1071

def _heuristicscopytracing(repo, c1, c2, base):

1072

"""Fast copytracing using filename heuristics

1072

"""Fast copytracing using filename heuristics

1073

1074

Assumes that moves or renames are of following two types:

1074

Assumes that moves or renames are of following two types:

1075

1076

1) Inside a directory only (same directory name but different filenames)

1076

1) Inside a directory only (same directory name but different filenames)

1077

2) Move from one directory to another

1077

2) Move from one directory to another

1078

(same filenames but different directory names)

1078

(same filenames but different directory names)

1079

1080

Works only when there are no merge commits in the "source branch".

1080

Works only when there are no merge commits in the "source branch".

1081

Source branch is commits from base up to c2 not including base.

1081

Source branch is commits from base up to c2 not including base.

1082

1083

If merge is involved it fallbacks to _fullcopytracing().

1083

If merge is involved it fallbacks to _fullcopytracing().

1084

1085

Can be used by setting the following config:

1085

Can be used by setting the following config:

1086

1087

[experimental]

1087

[experimental]

1088

copytrace = heuristics

1088

copytrace = heuristics

1089

1090

In some cases the copy/move candidates found by heuristics can be very large

1090

In some cases the copy/move candidates found by heuristics can be very large

1091

in number and that will make the algorithm slow. The number of possible

1091

in number and that will make the algorithm slow. The number of possible

1092

candidates to check can be limited by using the config

1092

candidates to check can be limited by using the config

1093

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1093

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1094

"""

1094

"""

1095

1096

if c1.rev() is None:

1096

if c1.rev() is None:

1097

c1 = c1.p1()

1097

c1 = c1.p1()

1098

if c2.rev() is None:

1098

if c2.rev() is None:

1099

c2 = c2.p1()

1099

c2 = c2.p1()

1100

1101

changedfiles = set()

1101

changedfiles = set()

1102

m1 = c1.manifest()

1102

m1 = c1.manifest()

1103

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1103

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1104

# If base is not in c2 branch, we switch to fullcopytracing

1104

# If base is not in c2 branch, we switch to fullcopytracing

1105

repo.ui.debug(

1105

repo.ui.debug(

1106

b"switching to full copytracing as base is not "

1106

b"switching to full copytracing as base is not "

1107

b"an ancestor of c2\n"

1107

b"an ancestor of c2\n"

1108

)

1108

)

1109

return _fullcopytracing(repo, c1, c2, base)

1109

return _fullcopytracing(repo, c1, c2, base)

1110

1111

ctx = c2

1111

ctx = c2

1112

while ctx != base:

1112

while ctx != base:

1113

if len(ctx.parents()) == 2:

1113

if len(ctx.parents()) == 2:

1114

# To keep things simple let's not handle merges

1114

# To keep things simple let's not handle merges

1115

repo.ui.debug(b"switching to full copytracing because of merges\n")

1115

repo.ui.debug(b"switching to full copytracing because of merges\n")

1116

return _fullcopytracing(repo, c1, c2, base)

1116

return _fullcopytracing(repo, c1, c2, base)

1117

changedfiles.update(ctx.files())

1117

changedfiles.update(ctx.files())

1118

ctx = ctx.p1()

1118

ctx = ctx.p1()

1119

1120

copies2 = {}

1120

copies2 = {}

1121

cp = _forwardcopies(base, c2)

1121

cp = _forwardcopies(base, c2)

1122

for dst, src in pycompat.iteritems(cp):

1122

for dst, src in pycompat.iteritems(cp):

1123

if src in m1:

1123

if src in m1:

1124

copies2[dst] = src

1124

copies2[dst] = src

1125

1126

# file is missing if it isn't present in the destination, but is present in

1126

# file is missing if it isn't present in the destination, but is present in

1127

# the base and present in the source.

1127

# the base and present in the source.

1128

# Presence in the base is important to exclude added files, presence in the

1128

# Presence in the base is important to exclude added files, presence in the

1129

# source is important to exclude removed files.

1129

# source is important to exclude removed files.

1130

filt = lambda f: f not in m1 and f in base and f in c2

1130

filt = lambda f: f not in m1 and f in base and f in c2

1131

missingfiles = [f for f in changedfiles if filt(f)]

1131

missingfiles = [f for f in changedfiles if filt(f)]

1132

1133

copies1 = {}

1133

copies1 = {}

1134

if missingfiles:

1134

if missingfiles:

1135

basenametofilename = collections.defaultdict(list)

1135

basenametofilename = collections.defaultdict(list)

1136

dirnametofilename = collections.defaultdict(list)

1136

dirnametofilename = collections.defaultdict(list)

1137

1138

for f in m1.filesnotin(base.manifest()):

1138

for f in m1.filesnotin(base.manifest()):

1139

basename = os.path.basename(f)

1139

basename = os.path.basename(f)

1140

dirname = os.path.dirname(f)

1140

dirname = os.path.dirname(f)

1141

basenametofilename[basename].append(f)

1141

basenametofilename[basename].append(f)

1142

dirnametofilename[dirname].append(f)

1142

dirnametofilename[dirname].append(f)

1143

1144

for f in missingfiles:

1144

for f in missingfiles:

1145

basename = os.path.basename(f)

1145

basename = os.path.basename(f)

1146

dirname = os.path.dirname(f)

1146

dirname = os.path.dirname(f)

1147

samebasename = basenametofilename[basename]

1147

samebasename = basenametofilename[basename]

1148

samedirname = dirnametofilename[dirname]

1148

samedirname = dirnametofilename[dirname]

1149

movecandidates = samebasename + samedirname

1149

movecandidates = samebasename + samedirname

1150

# f is guaranteed to be present in c2, that's why

1150

# f is guaranteed to be present in c2, that's why

1151

# c2.filectx(f) won't fail

1151

# c2.filectx(f) won't fail

1152

f2 = c2.filectx(f)

1152

f2 = c2.filectx(f)

1153

# we can have a lot of candidates which can slow down the heuristics

1153

# we can have a lot of candidates which can slow down the heuristics

1154

# config value to limit the number of candidates moves to check

1154

# config value to limit the number of candidates moves to check

1155

maxcandidates = repo.ui.configint(

1155

maxcandidates = repo.ui.configint(

1156

b'experimental', b'copytrace.movecandidateslimit'

1156

b'experimental', b'copytrace.movecandidateslimit'

1157

)

1157

)

1158

1159

if len(movecandidates) > maxcandidates:

1159

if len(movecandidates) > maxcandidates:

1160

repo.ui.status(

1160

repo.ui.status(

1161

_(

1161

_(

1162

b"skipping copytracing for '%s', more "

1162

b"skipping copytracing for '%s', more "

1163

b"candidates than the limit: %d\n"

1163

b"candidates than the limit: %d\n"

1164

)

1164

)

1165

% (f, len(movecandidates))

1165

% (f, len(movecandidates))

1166

)

1166

)

1167

continue

1167

continue

1168

1169

for candidate in movecandidates:

1169

for candidate in movecandidates:

1170

f1 = c1.filectx(candidate)

1170

f1 = c1.filectx(candidate)

1171

if _related(f1, f2):

1171

if _related(f1, f2):

1172

# if there are a few related copies then we'll merge

1172

# if there are a few related copies then we'll merge

1173

# changes into all of them. This matches the behaviour

1173

# changes into all of them. This matches the behaviour

1174

# of upstream copytracing

1174

# of upstream copytracing

1175

copies1[candidate] = f

1175

copies1[candidate] = f

1176

1177

return branch_copies(copies1), branch_copies(copies2), {}

1177

return branch_copies(copies1), branch_copies(copies2), {}

1178

1179

1180

def _related(f1, f2):

1180

def _related(f1, f2):

1181

"""return True if f1 and f2 filectx have a common ancestor

1181

"""return True if f1 and f2 filectx have a common ancestor

1182

1183

Walk back to common ancestor to see if the two files originate

1183

Walk back to common ancestor to see if the two files originate

1184

from the same file. Since workingfilectx's rev() is None it messes

1184

from the same file. Since workingfilectx's rev() is None it messes

1185

up the integer comparison logic, hence the pre-step check for

1185

up the integer comparison logic, hence the pre-step check for

1186

None (f1 and f2 can only be workingfilectx's initially).

1186

None (f1 and f2 can only be workingfilectx's initially).

1187

"""

1187

"""

1188

1189

if f1 == f2:

1189

if f1 == f2:

1190

return True # a match

1190

return True # a match

1191

1192

g1, g2 = f1.ancestors(), f2.ancestors()

1192

g1, g2 = f1.ancestors(), f2.ancestors()

1193

try:

1193

try:

1194

f1r, f2r = f1.linkrev(), f2.linkrev()

1194

f1r, f2r = f1.linkrev(), f2.linkrev()

1195

1196

if f1r is None:

1196

if f1r is None:

1197

f1 = next(g1)

1197

f1 = next(g1)

1198

if f2r is None:

1198

if f2r is None:

1199

f2 = next(g2)

1199

f2 = next(g2)

1200

1201

while True:

1201

while True:

1202

f1r, f2r = f1.linkrev(), f2.linkrev()

1202

f1r, f2r = f1.linkrev(), f2.linkrev()

1203

if f1r > f2r:

1203

if f1r > f2r:

1204

f1 = next(g1)

1204

f1 = next(g1)

1205

elif f2r > f1r:

1205

elif f2r > f1r:

1206

f2 = next(g2)

1206

f2 = next(g2)

1207

else: # f1 and f2 point to files in the same linkrev

1207

else: # f1 and f2 point to files in the same linkrev

1208

return f1 == f2 # true if they point to the same file

1208

return f1 == f2 # true if they point to the same file

1209

except StopIteration:

1209

except StopIteration:

1210

return False

1210

return False

1211

1212

1213

def graftcopies(wctx, ctx, base):

1213

def graftcopies(wctx, ctx, base):

1214

"""reproduce copies between base and ctx in the wctx

1214

"""reproduce copies between base and ctx in the wctx

1215

1216

Unlike mergecopies(), this function will only consider copies between base

1216

Unlike mergecopies(), this function will only consider copies between base

1217

and ctx; it will ignore copies between base and wctx. Also unlike

1217

and ctx; it will ignore copies between base and wctx. Also unlike

1218

mergecopies(), this function will apply copies to the working copy (instead

1218

mergecopies(), this function will apply copies to the working copy (instead

1219

of just returning information about the copies). That makes it cheaper

1219

of just returning information about the copies). That makes it cheaper

1220

(especially in the common case of base==ctx.p1()) and useful also when

1220

(especially in the common case of base==ctx.p1()) and useful also when

1221

experimental.copytrace=off.

1221

experimental.copytrace=off.

1222

1223

merge.update() will have already marked most copies, but it will only

1223

merge.update() will have already marked most copies, but it will only

1224

mark copies if it thinks the source files are related (see

1224

mark copies if it thinks the source files are related (see

1225

merge._related()). It will also not mark copies if the file wasn't modified

1225

merge._related()). It will also not mark copies if the file wasn't modified

1226

on the local side. This function adds the copies that were "missed"

1226

on the local side. This function adds the copies that were "missed"

1227

by merge.update().

1227

by merge.update().

1228

"""

1228

"""

1229

new_copies = pathcopies(base, ctx)

1229

new_copies = pathcopies(base, ctx)

1230

parent = wctx.p1()

1230

parent = wctx.p1()

1231

_filter(parent, wctx, new_copies)

1231

_filter(parent, wctx, new_copies)

1232

# extra filtering to drop copy information for files that existed before

1232

# Extra filtering to drop copy information for files that existed before

1233

# the graft (otherwise we would create merge filelog for non-merge commit

1233

# the graft. This is to handle the case of grafting a rename onto a commit

1234

# that already has the rename. Otherwise the presence of copy information

1235

# would result in the creation of an empty commit where we would prefer to

1236

# not create one.

1234

for dest, __ in list(new_copies.items()):

1237

for dest, __ in list(new_copies.items()):

1235

if dest in parent:

1238

if dest in parent:

1236

del new_copies[dest]

1239

del new_copies[dest]

1237

for dst, src in pycompat.iteritems(new_copies):

1240

for dst, src in pycompat.iteritems(new_copies):

1238

wctx[dst].markcopied(src)

1241

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import (
                 nullid,
                 nullrev,
             )
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 6
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     if k == v:  # case 3
                         del t[k]
                     elif v not in src:  # case 5
                         # remove copies from files that didn't exist
                         del t[k]
                     elif k not in dst:  # case 1
                         # remove copies to files that were then removed
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
                     missing = list(b.walk(match))
                     # _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(b'debug.copies:      searching all files: %d\n' % len(missing))
                 else:
                     missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                     if debug:
                         dbg(
                             b'debug.copies:      missing files to search: %d\n'
                             % len(missing)
                         )
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 if rustmod is not None:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         return (p1, p2, raw)
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         else:
                             changes = None
                         return (p1, p2, changes)
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
                     # this deal with the special case mentionned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
                     # We iterate over the childre, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
                     # An interresting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     return _combine_changeset_copies(
                         revs, children_count, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None:
                     final_copies = rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, isancestor
                     )
                 else:
                     isancestor = cached_is_ancestor(isancestor)
                     all_copies = {}
                     # iterate over all the "children" side of copy tracing "edge"
                     for current_rev in revs:
                         p1, p2, changes = revinfo(current_rev)
                         current_copies = None
                         # iterate over all parents to chain the existing data with the
                         # data from the parent → child edge.
                         for parent, parent_rev in ((1, p1), (2, p2)):
                             if parent_rev == nullrev:
                                 continue
                             remaining_children = children_count.get(parent_rev)
                             if remaining_children is None:
                                 continue
                             remaining_children -= 1
                             children_count[parent_rev] = remaining_children
                             if remaining_children:
                                 copies = all_copies.get(parent_rev, None)
                             else:
                                 copies = all_copies.pop(parent_rev, None)
                             if copies is None:
                                 # this is a root
                                 newcopies = copies = {}
                             elif remaining_children:
                                 newcopies = copies.copy()
                             else:
                                 newcopies = copies
                             # chain the data in the edge with the existing data
                             if changes is not None:
                                 childcopies = {}
                                 if parent == 1:
                                     childcopies = changes.copied_from_p1
                                 elif parent == 2:
                                     childcopies = changes.copied_from_p2
                                 if childcopies:
                                     newcopies = copies.copy()
                                     for dest, source in pycompat.iteritems(childcopies):
                                         prev = copies.get(source)
                                         if prev is not None and prev[1] is not None:
                                             source = prev[1]
                                         newcopies[dest] = (current_rev, source)
                                     assert newcopies is not copies
                                 if changes.removed:
                                     for f in changes.removed:
                                         if f in newcopies:
                                             if newcopies is copies:
                                                 # copy on write to avoid affecting potential other
                                                 # branches.  when there are no other branches, this
                                                 # could be avoided.
                                                 newcopies = copies.copy()
                                             newcopies[f] = (current_rev, None)
                             # check potential need to combine the data from another parent (for
                             # that child). See comment below for details.
                             if current_copies is None:
                                 current_copies = newcopies
                             else:
                                 # we are the second parent to work on c, we need to merge our
                                 # work with the other.
                                 #
                                 # In case of conflict, parent 1 take precedence over parent 2.
                                 # This is an arbitrary choice made anew when implementing
                                 # changeset based copies. It was made without regards with
                                 # potential filelog related behavior.
                                 assert parent == 2
                                 current_copies = _merge_copies_dict(
                                     newcopies, current_copies, isancestor, changes
                                 )
                         all_copies[current_rev] = current_copies
                     # filter out internal details and return a {dest: source mapping}
                     final_copies = {}
                     for dest, (tt, source) in all_copies[targetrev].items():
                         if source is not None:
                             final_copies[dest] = source
                 if not alwaysmatch:
                     for filename in list(final_copies.keys()):
                         if not match(filename):
                             del final_copies[filename]
                 return final_copies
             # constant to decide which side to pick with _merge_copies_dict
             PICK_MINOR = 0
             PICK_MAJOR = 1
             PICK_EITHER = 2
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         pick = _compare_values(changes, isancestor, dest, other, value)
                         if pick == PICK_MAJOR:
                             minor[dest] = value
                 return minor
             def _compare_values(changes, isancestor, dest, minor, major):
                 """compare two value within a _merge_copies_dict loop iteration"""
                 major_tt, major_value = major
                 minor_tt, minor_value = minor
                 # evacuate some simple case first:
                 if major_tt == minor_tt:
                     # if it comes from the same revision it must be the same value
                     assert major_value == minor_value
                     return PICK_EITHER
                 elif major[1] == minor[1]:
                     return PICK_EITHER
                 # actual merging needed: content from "major" wins, unless it is older than
                 # the branch point or there is a merge
                 elif changes is not None and major[1] is None and dest in changes.salvaged:
                     return PICK_MINOR
                 elif changes is not None and minor[1] is None and dest in changes.salvaged:
                     return PICK_MAJOR
                 elif changes is not None and dest in changes.merged:
                     return PICK_MAJOR
                 elif not isancestor(major_tt, minor_tt):
                     if major[1] is not None:
                         return PICK_MAJOR
                     elif isancestor(minor_tt, major_tt):
                         return PICK_MAJOR
                 return PICK_MINOR
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 parent = wctx.p1()
                 _filter(parent, wctx, new_copies)
-                # extra filtering to drop copy information for files that existed before
+                # Extra filtering to drop copy information for files that existed before
-                # the graft (otherwise we would create merge filelog for non-merge commit
+                # the graft. This is to handle the case of grafting a rename onto a commit
+                # that already has the rename. Otherwise the presence of copy information
+                # would result in the creation of an empty commit where we would prefer to
+                # not create one.
                 for dest, __ in list(new_copies.items()):
                     if dest in parent:
                         del new_copies[dest]
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)