upstream/mercurial-mirror Commit - r46765:a132aa59

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import (

15

from .node import (

16

nullid,

16

nullid,

17

nullrev,

17

nullrev,

18

)

18

)

19

20

from . import (

20

from . import (

21

match as matchmod,

21

match as matchmod,

22

pathutil,

22

pathutil,

23

policy,

23

policy,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

from .revlogutils import (

31

from .revlogutils import (

32

flagutil,

32

flagutil,

33

sidedata as sidedatamod,

33

sidedata as sidedatamod,

34

)

34

)

35

36

rustmod = policy.importrust("copy_tracing")

36

rustmod = policy.importrust("copy_tracing")

37

38

39

def _filter(src, dst, t):

39

def _filter(src, dst, t):

40

"""filters out invalid copies after chaining"""

40

"""filters out invalid copies after chaining"""

41

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

44

# in the following table (not including trivial cases). For example, case 2

44

# in the following table (not including trivial cases). For example, case 2

45

# is where a file existed in 'src' and remained under that name in 'mid' and

45

# is where a file existed in 'src' and remained under that name in 'mid' and

46

# then was renamed between 'mid' and 'dst'.

46

# then was renamed between 'mid' and 'dst'.

47

#

47

#

48

# case src mid dst result

48

# case src mid dst result

49

# 1 x y - -

49

# 1 x y - -

50

# 2 x y y x->y

50

# 2 x y y x->y

51

# 3 x y x -

51

# 3 x y x -

52

# 4 x y z x->z

52

# 4 x y z x->z

53

# 5 - x y -

53

# 5 - x y -

54

# 6 x x y x->y

54

# 6 x x y x->y

55

#

55

#

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

58

# between 5 and 6, so it includes all cases in its result.

58

# between 5 and 6, so it includes all cases in its result.

59

# Cases 1, 3, and 5 are then removed by _filter().

59

# Cases 1, 3, and 5 are then removed by _filter().

60

61

for k, v in list(t.items()):

61

for k, v in list(t.items()):

62

# remove copies from files that didn't exist

62

# remove copies from files that didn't exist

63

if v not in src:

63

if v not in src:

64

del t[k]

64

del t[k]

65

# remove criss-crossed copies

65

# remove criss-crossed copies

66

elif k in src and v in dst:

66

elif k in src and v in dst:

67

del t[k]

67

del t[k]

68

# remove copies to files that were then removed

68

# remove copies to files that were then removed

69

elif k not in dst:

69

elif k not in dst:

70

del t[k]

70

del t[k]

71

72

73

def _chain(prefix, suffix):

73

def _chain(prefix, suffix):

74

"""chain two sets of copies 'prefix' and 'suffix'"""

74

"""chain two sets of copies 'prefix' and 'suffix'"""

75

result = prefix.copy()

75

result = prefix.copy()

76

for key, value in pycompat.iteritems(suffix):

76

for key, value in pycompat.iteritems(suffix):

77

result[key] = prefix.get(value, value)

77

result[key] = prefix.get(value, value)

78

return result

78

return result

79

80

81

def _tracefile(fctx, am, basemf):

81

def _tracefile(fctx, am, basemf):

82

"""return file context that is the ancestor of fctx present in ancestor

82

"""return file context that is the ancestor of fctx present in ancestor

83

manifest am

83

manifest am

84

85

Note: we used to try and stop after a given limit, however checking if that

85

Note: we used to try and stop after a given limit, however checking if that

86

limit is reached turned out to be very expensive. we are better off

86

limit is reached turned out to be very expensive. we are better off

87

disabling that feature."""

87

disabling that feature."""

88

89

for f in fctx.ancestors():

89

for f in fctx.ancestors():

90

path = f.path()

90

path = f.path()

91

if am.get(path, None) == f.filenode():

91

if am.get(path, None) == f.filenode():

92

return path

92

return path

93

if basemf and basemf.get(path, None) == f.filenode():

93

if basemf and basemf.get(path, None) == f.filenode():

94

return path

94

return path

95

96

97

def _dirstatecopies(repo, match=None):

97

def _dirstatecopies(repo, match=None):

98

ds = repo.dirstate

98

ds = repo.dirstate

99

c = ds.copies().copy()

99

c = ds.copies().copy()

100

for k in list(c):

100

for k in list(c):

101

if ds[k] not in b'anm' or (match and not match(k)):

101

if ds[k] not in b'anm' or (match and not match(k)):

102

del c[k]

102

del c[k]

103

return c

103

return c

104

105

106

def _computeforwardmissing(a, b, match=None):

106

def _computeforwardmissing(a, b, match=None):

107

"""Computes which files are in b but not a.

107

"""Computes which files are in b but not a.

108

This is its own function so extensions can easily wrap this call to see what

108

This is its own function so extensions can easily wrap this call to see what

109

files _forwardcopies is about to process.

109

files _forwardcopies is about to process.

110

"""

110

"""

111

ma = a.manifest()

111

ma = a.manifest()

112

mb = b.manifest()

112

mb = b.manifest()

113

return mb.filesnotin(ma, match=match)

113

return mb.filesnotin(ma, match=match)

114

115

116

def usechangesetcentricalgo(repo):

116

def usechangesetcentricalgo(repo):

117

"""Checks if we should use changeset-centric copy algorithms"""

117

"""Checks if we should use changeset-centric copy algorithms"""

118

if repo.filecopiesmode == b'changeset-sidedata':

118

if repo.filecopiesmode == b'changeset-sidedata':

119

return True

119

return True

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

121

changesetsource = (b'changeset-only', b'compatibility')

121

changesetsource = (b'changeset-only', b'compatibility')

122

return readfrom in changesetsource

122

return readfrom in changesetsource

123

124

125

def _committedforwardcopies(a, b, base, match):

125

def _committedforwardcopies(a, b, base, match):

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

127

# files might have to be traced back to the fctx parent of the last

127

# files might have to be traced back to the fctx parent of the last

128

# one-side-only changeset, but not further back than that

128

# one-side-only changeset, but not further back than that

129

repo = a._repo

129

repo = a._repo

130

131

if usechangesetcentricalgo(repo):

131

if usechangesetcentricalgo(repo):

132

return _changesetforwardcopies(a, b, match)

132

return _changesetforwardcopies(a, b, match)

133

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

135

dbg = repo.ui.debug

135

dbg = repo.ui.debug

136

if debug:

136

if debug:

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

138

am = a.manifest()

138

am = a.manifest()

139

basemf = None if base is None else base.manifest()

139

basemf = None if base is None else base.manifest()

140

141

# find where new files came from

141

# find where new files came from

142

# we currently don't try to find where old files went, too expensive

142

# we currently don't try to find where old files went, too expensive

143

# this means we can miss a case like 'hg rm b; hg cp a b'

143

# this means we can miss a case like 'hg rm b; hg cp a b'

144

cm = {}

144

cm = {}

145

146

# Computing the forward missing is quite expensive on large manifests, since

146

# Computing the forward missing is quite expensive on large manifests, since

147

# it compares the entire manifests. We can optimize it in the common use

147

# it compares the entire manifests. We can optimize it in the common use

148

# case of computing what copies are in a commit versus its parent (like

148

# case of computing what copies are in a commit versus its parent (like

149

# during a rebase or histedit). Note, we exclude merge commits from this

149

# during a rebase or histedit). Note, we exclude merge commits from this

150

# optimization, since the ctx.files() for a merge commit is not correct for

150

# optimization, since the ctx.files() for a merge commit is not correct for

151

# this comparison.

151

# this comparison.

152

forwardmissingmatch = match

152

forwardmissingmatch = match

153

if b.p1() == a and b.p2().node() == nullid:

153

if b.p1() == a and b.p2().node() == nullid:

154

filesmatcher = matchmod.exact(b.files())

154

filesmatcher = matchmod.exact(b.files())

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

157

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

159

160

if debug:

160

if debug:

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

162

163

for f in sorted(missing):

163

for f in sorted(missing):

164

if debug:

164

if debug:

165

dbg(b'debug.copies: tracing file: %s\n' % f)

165

dbg(b'debug.copies: tracing file: %s\n' % f)

166

fctx = b[f]

166

fctx = b[f]

167

fctx._ancestrycontext = ancestrycontext

167

fctx._ancestrycontext = ancestrycontext

168

169

if debug:

169

if debug:

170

start = util.timer()

170

start = util.timer()

171

opath = _tracefile(fctx, am, basemf)

171

opath = _tracefile(fctx, am, basemf)

172

if opath:

172

if opath:

173

if debug:

173

if debug:

174

dbg(b'debug.copies: rename of: %s\n' % opath)

174

dbg(b'debug.copies: rename of: %s\n' % opath)

175

cm[f] = opath

175

cm[f] = opath

176

if debug:

176

if debug:

177

dbg(

177

dbg(

178

b'debug.copies: time: %f seconds\n'

178

b'debug.copies: time: %f seconds\n'

179

% (util.timer() - start)

179

% (util.timer() - start)

180

)

180

)

181

return cm

181

return cm

182

183

184

def _revinfo_getter(repo, match):

184

def _revinfo_getter(repo, match):

185

"""returns a function that returns the following data given a <rev>"

185

"""returns a function that returns the following data given a <rev>"

186

187

* p1: revision number of first parent

187

* p1: revision number of first parent

188

* p2: revision number of first parent

188

* p2: revision number of first parent

189

* changes: a ChangingFiles object

189

* changes: a ChangingFiles object

190

"""

190

"""

191

cl = repo.changelog

191

cl = repo.changelog

192

parents = cl.parentrevs

192

parents = cl.parentrevs

193

flags = cl.flags

193

flags = cl.flags

194

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

196

197

changelogrevision = cl.changelogrevision

197

changelogrevision = cl.changelogrevision

198

199

# A small cache to avoid doing the work twice for merges

200

#

201

# In the vast majority of cases, if we ask information for a revision

202

# about 1 parent, we'll later ask it for the other. So it make sense to

203

# keep the information around when reaching the first parent of a merge

204

# and dropping it after it was provided for the second parents.

205

#

206

# It exists cases were only one parent of the merge will be walked. It

207

# happens when the "destination" the copy tracing is descendant from a

208

# new root, not common with the "source". In that case, we will only walk

209

# through merge parents that are descendant of changesets common

210

# between "source" and "destination".

211

#

212

# With the current case implementation if such changesets have a copy

213

# information, we'll keep them in memory until the end of

214

# _changesetforwardcopies. We don't expect the case to be frequent

215

# enough to matters.

216

#

217

# In addition, it would be possible to reach pathological case, were

218

# many first parent are met before any second parent is reached. In

219

# that case the cache could grow. If this even become an issue one can

220

# safely introduce a maximum cache size. This would trade extra CPU/IO

221

# time to save memory.

222

merge_caches = {}

223

224

alwaysmatch = match.always()

199

alwaysmatch = match.always()

225

200

226

if rustmod is not None and alwaysmatch:

201

if rustmod is not None and alwaysmatch:

227

202

228

def revinfo(rev):

203

def revinfo(rev):

229

p1, p2 = parents(rev)

204

p1, p2 = parents(rev)

230

value = None

231

e = merge_caches.pop(rev, None)

232

if e is not None:

233

return e

234

if flags(rev) & HASCOPIESINFO:

205

if flags(rev) & HASCOPIESINFO:

235

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

206

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

236

else:

207

else:

237

raw = None

208

raw = None

238

~~value~~ = (p1, p2, raw)

209

return (p1, p2, raw)

239

if p1 != nullrev and p2 != nullrev:

240

# XXX some case we over cache, IGNORE

241

merge_caches[rev] = value

242

return value

243

210

244

else:

211

else:

245

212

246

def revinfo(rev):

213

def revinfo(rev):

247

p1, p2 = parents(rev)

214

p1, p2 = parents(rev)

248

value = None

249

e = merge_caches.pop(rev, None)

250

if e is not None:

251

return e

252

changes = None

253

if flags(rev) & HASCOPIESINFO:

215

if flags(rev) & HASCOPIESINFO:

254

changes = changelogrevision(rev).changes

216

changes = changelogrevision(rev).changes

255

value = (p1, p2, changes)

217

else:

256

if p1 != nullrev and p2 != nullrev:

218

changes = None

257

# XXX some case we over cache, IGNORE

219

return (p1, p2, changes)

258

merge_caches[rev] = value

259

return value

260

220

261

return revinfo

221

return revinfo

262

222

263

223

264

def cached_is_ancestor(is_ancestor):

224

def cached_is_ancestor(is_ancestor):

265

"""return a cached version of is_ancestor"""

225

"""return a cached version of is_ancestor"""

266

cache = {}

226

cache = {}

267

227

268

def _is_ancestor(anc, desc):

228

def _is_ancestor(anc, desc):

269

if anc > desc:

229

if anc > desc:

270

return False

230

return False

271

elif anc == desc:

231

elif anc == desc:

272

return True

232

return True

273

key = (anc, desc)

233

key = (anc, desc)

274

ret = cache.get(key)

234

ret = cache.get(key)

275

if ret is None:

235

if ret is None:

276

ret = cache[key] = is_ancestor(anc, desc)

236

ret = cache[key] = is_ancestor(anc, desc)

277

return ret

237

return ret

278

238

279

return _is_ancestor

239

return _is_ancestor

280

240

281

241

282

def _changesetforwardcopies(a, b, match):

242

def _changesetforwardcopies(a, b, match):

283

if a.rev() in (nullrev, b.rev()):

243

if a.rev() in (nullrev, b.rev()):

284

return {}

244

return {}

285

245

286

repo = a.repo().unfiltered()

246

repo = a.repo().unfiltered()

287

children = {}

247

children = {}

288

248

289

cl = repo.changelog

249

cl = repo.changelog

290

isancestor = cl.isancestorrev

250

isancestor = cl.isancestorrev

291

251

292

# To track rename from "A" to B, we need to gather all parent → children

252

# To track rename from "A" to B, we need to gather all parent → children

293

# edges that are contains in `::B` but not in `::A`.

253

# edges that are contains in `::B` but not in `::A`.

294

#

254

#

295

#

255

#

296

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

256

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

297

# ::a`) and also all the "roots point", ie the parents of the exclusive set

257

# ::a`) and also all the "roots point", ie the parents of the exclusive set

298

# that belong to ::a. These are exactly all the revisions needed to express

258

# that belong to ::a. These are exactly all the revisions needed to express

299

# the parent → children we need to combine.

259

# the parent → children we need to combine.

300

#

260

#

301

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

261

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

302

# excluding paths that leads to roots that are not ancestors of `a`. We

262

# excluding paths that leads to roots that are not ancestors of `a`. We

303

# keep this out of the explanation because it is hard enough without this special case..

263

# keep this out of the explanation because it is hard enough without this special case..

304

264

305

parents = cl._uncheckedparentrevs

265

parents = cl._uncheckedparentrevs

306

graph_roots = (nullrev, nullrev)

266

graph_roots = (nullrev, nullrev)

307

267

308

ancestors = cl.ancestors([a.rev()], inclusive=True)

268

ancestors = cl.ancestors([a.rev()], inclusive=True)

309

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

269

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

310

roots = set()

270

roots = set()

311

has_graph_roots = False

271

has_graph_roots = False

312

272

313

# iterate over `only(B, A)`

273

# iterate over `only(B, A)`

314

for r in revs:

274

for r in revs:

315

ps = parents(r)

275

ps = parents(r)

316

if ps == graph_roots:

276

if ps == graph_roots:

317

has_graph_roots = True

277

has_graph_roots = True

318

else:

278

else:

319

p1, p2 = ps

279

p1, p2 = ps

320

280

321

# find all the "root points" (see larger comment above)

281

# find all the "root points" (see larger comment above)

322

if p1 != nullrev and p1 in ancestors:

282

if p1 != nullrev and p1 in ancestors:

323

roots.add(p1)

283

roots.add(p1)

324

if p2 != nullrev and p2 in ancestors:

284

if p2 != nullrev and p2 in ancestors:

325

roots.add(p2)

285

roots.add(p2)

326

if not roots:

286

if not roots:

327

# no common revision to track copies from

287

# no common revision to track copies from

328

return {}

288

return {}

329

if has_graph_roots:

289

if has_graph_roots:

330

# this deal with the special case mentionned in the [1] footnotes. We

290

# this deal with the special case mentionned in the [1] footnotes. We

331

# must filter out revisions that leads to non-common graphroots.

291

# must filter out revisions that leads to non-common graphroots.

332

roots = list(roots)

292

roots = list(roots)

333

m = min(roots)

293

m = min(roots)

334

h = [b.rev()]

294

h = [b.rev()]

335

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

295

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

336

roots_to_head = set(roots_to_head)

296

roots_to_head = set(roots_to_head)

337

revs = [r for r in revs if r in roots_to_head]

297

revs = [r for r in revs if r in roots_to_head]

338

298

339

if repo.filecopiesmode == b'changeset-sidedata':

299

if repo.filecopiesmode == b'changeset-sidedata':

340

# When using side-data, we will process the edges "from" the children.

300

# When using side-data, we will process the edges "from" the children.

341

# We iterate over the childre, gathering previous collected data for

301

# We iterate over the childre, gathering previous collected data for

342

# the parents. Do know when the parents data is no longer necessary, we

302

# the parents. Do know when the parents data is no longer necessary, we

343

# keep a counter of how many children each revision has.

303

# keep a counter of how many children each revision has.

344

#

304

#

345

# An interresting property of `children_count` is that it only contains

305

# An interresting property of `children_count` is that it only contains

346

# revision that will be relevant for a edge of the graph. So if a

306

# revision that will be relevant for a edge of the graph. So if a

347

# children has parent not in `children_count`, that edges should not be

307

# children has parent not in `children_count`, that edges should not be

348

# processed.

308

# processed.

349

children_count = dict((r, 0) for r in roots)

309

children_count = dict((r, 0) for r in roots)

350

for r in revs:

310

for r in revs:

351

for p in cl.parentrevs(r):

311

for p in cl.parentrevs(r):

352

if p == nullrev:

312

if p == nullrev:

353

continue

313

continue

354

children_count[r] = 0

314

children_count[r] = 0

355

if p in children_count:

315

if p in children_count:

356

children_count[p] += 1

316

children_count[p] += 1

357

revinfo = _revinfo_getter(repo, match)

317

revinfo = _revinfo_getter(repo, match)

358

return _combine_changeset_copies(

318

return _combine_changeset_copies(

359

revs, children_count, b.rev(), revinfo, match, isancestor

319

revs, children_count, b.rev(), revinfo, match, isancestor

360

)

320

)

361

else:

321

else:

362

# When not using side-data, we will process the edges "from" the parent.

322

# When not using side-data, we will process the edges "from" the parent.

363

# so we need a full mapping of the parent -> children relation.

323

# so we need a full mapping of the parent -> children relation.

364

children = dict((r, []) for r in roots)

324

children = dict((r, []) for r in roots)

365

for r in revs:

325

for r in revs:

366

for p in cl.parentrevs(r):

326

for p in cl.parentrevs(r):

367

if p == nullrev:

327

if p == nullrev:

368

continue

328

continue

369

children[r] = []

329

children[r] = []

370

if p in children:

330

if p in children:

371

children[p].append(r)

331

children[p].append(r)

372

x = revs.pop()

332

x = revs.pop()

373

assert x == b.rev()

333

assert x == b.rev()

374

revs.extend(roots)

334

revs.extend(roots)

375

revs.sort()

335

revs.sort()

376

336

377

revinfo = _revinfo_getter_extra(repo)

337

revinfo = _revinfo_getter_extra(repo)

378

return _combine_changeset_copies_extra(

338

return _combine_changeset_copies_extra(

379

revs, children, b.rev(), revinfo, match, isancestor

339

revs, children, b.rev(), revinfo, match, isancestor

380

)

340

)

381

341

382

342

383

def _combine_changeset_copies(

343

def _combine_changeset_copies(

384

revs, children_count, targetrev, revinfo, match, isancestor

344

revs, children_count, targetrev, revinfo, match, isancestor

385

):

345

):

386

"""combine the copies information for each item of iterrevs

346

"""combine the copies information for each item of iterrevs

387

347

388

revs: sorted iterable of revision to visit

348

revs: sorted iterable of revision to visit

389

children_count: a {parent: <number-of-relevant-children>} mapping.

349

children_count: a {parent: <number-of-relevant-children>} mapping.

390

targetrev: the final copies destination revision (not in iterrevs)

350

targetrev: the final copies destination revision (not in iterrevs)

391

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

351

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

392

match: a matcher

352

match: a matcher

393

353

394

It returns the aggregated copies information for `targetrev`.

354

It returns the aggregated copies information for `targetrev`.

395

"""

355

"""

396

356

397

alwaysmatch = match.always()

357

alwaysmatch = match.always()

398

358

399

if rustmod is not None and alwaysmatch:

359

if rustmod is not None and alwaysmatch:

400

return rustmod.combine_changeset_copies(

360

return rustmod.combine_changeset_copies(

401

list(revs), children_count, targetrev, revinfo, isancestor

361

list(revs), children_count, targetrev, revinfo, isancestor

402

)

362

)

403

363

404

isancestor = cached_is_ancestor(isancestor)

364

isancestor = cached_is_ancestor(isancestor)

405

365

406

all_copies = {}

366

all_copies = {}

407

# iterate over all the "children" side of copy tracing "edge"

367

# iterate over all the "children" side of copy tracing "edge"

408

for current_rev in revs:

368

for current_rev in revs:

409

p1, p2, changes = revinfo(current_rev)

369

p1, p2, changes = revinfo(current_rev)

410

current_copies = None

370

current_copies = None

411

371

412

# iterate over all parents to chain the existing data with the

372

# iterate over all parents to chain the existing data with the

413

# data from the parent → child edge.

373

# data from the parent → child edge.

414

for parent, parent_rev in ((1, p1), (2, p2)):

374

for parent, parent_rev in ((1, p1), (2, p2)):

415

if parent_rev == nullrev:

375

if parent_rev == nullrev:

416

continue

376

continue

417

remaining_children = children_count.get(parent_rev)

377

remaining_children = children_count.get(parent_rev)

418

if remaining_children is None:

378

if remaining_children is None:

419

continue

379

continue

420

remaining_children -= 1

380

remaining_children -= 1

421

children_count[parent_rev] = remaining_children

381

children_count[parent_rev] = remaining_children

422

if remaining_children:

382

if remaining_children:

423

copies = all_copies.get(parent_rev, None)

383

copies = all_copies.get(parent_rev, None)

424

else:

384

else:

425

copies = all_copies.pop(parent_rev, None)

385

copies = all_copies.pop(parent_rev, None)

426

386

427

if copies is None:

387

if copies is None:

428

# this is a root

388

# this is a root

429

copies = {}

389

copies = {}

430

390

431

newcopies = copies

391

newcopies = copies

432

# chain the data in the edge with the existing data

392

# chain the data in the edge with the existing data

433

if changes is not None:

393

if changes is not None:

434

childcopies = {}

394

childcopies = {}

435

if parent == 1:

395

if parent == 1:

436

childcopies = changes.copied_from_p1

396

childcopies = changes.copied_from_p1

437

elif parent == 2:

397

elif parent == 2:

438

childcopies = changes.copied_from_p2

398

childcopies = changes.copied_from_p2

439

399

440

if not alwaysmatch:

400

if not alwaysmatch:

441

childcopies = {

401

childcopies = {

442

dst: src

402

dst: src

443

for dst, src in childcopies.items()

403

for dst, src in childcopies.items()

444

if match(dst)

404

if match(dst)

445

}

405

}

446

if childcopies:

406

if childcopies:

447

newcopies = copies.copy()

407

newcopies = copies.copy()

448

for dest, source in pycompat.iteritems(childcopies):

408

for dest, source in pycompat.iteritems(childcopies):

449

prev = copies.get(source)

409

prev = copies.get(source)

450

if prev is not None and prev[1] is not None:

410

if prev is not None and prev[1] is not None:

451

source = prev[1]

411

source = prev[1]

452

newcopies[dest] = (current_rev, source)

412

newcopies[dest] = (current_rev, source)

453

assert newcopies is not copies

413

assert newcopies is not copies

454

if changes.removed:

414

if changes.removed:

455

if newcopies is copies:

415

if newcopies is copies:

456

newcopies = copies.copy()

416

newcopies = copies.copy()

457

for f in changes.removed:

417

for f in changes.removed:

458

if f in newcopies:

418

if f in newcopies:

459

if newcopies is copies:

419

if newcopies is copies:

460

# copy on write to avoid affecting potential other

420

# copy on write to avoid affecting potential other

461

# branches. when there are no other branches, this

421

# branches. when there are no other branches, this

462

# could be avoided.

422

# could be avoided.

463

newcopies = copies.copy()

423

newcopies = copies.copy()

464

newcopies[f] = (current_rev, None)

424

newcopies[f] = (current_rev, None)

465

425

466

# check potential need to combine the data from another parent (for

426

# check potential need to combine the data from another parent (for

467

# that child). See comment below for details.

427

# that child). See comment below for details.

468

if current_copies is None:

428

if current_copies is None:

469

current_copies = newcopies

429

current_copies = newcopies

470

elif current_copies is newcopies:

430

elif current_copies is newcopies:

471

# nothing to merge:

431

# nothing to merge:

472

pass

432

pass

473

else:

433

else:

474

# we are the second parent to work on c, we need to merge our

434

# we are the second parent to work on c, we need to merge our

475

# work with the other.

435

# work with the other.

476

#

436

#

477

# In case of conflict, parent 1 take precedence over parent 2.

437

# In case of conflict, parent 1 take precedence over parent 2.

478

# This is an arbitrary choice made anew when implementing

438

# This is an arbitrary choice made anew when implementing

479

# changeset based copies. It was made without regards with

439

# changeset based copies. It was made without regards with

480

# potential filelog related behavior.

440

# potential filelog related behavior.

481

assert parent == 2

441

assert parent == 2

482

current_copies = _merge_copies_dict(

442

current_copies = _merge_copies_dict(

483

newcopies, current_copies, isancestor, changes

443

newcopies, current_copies, isancestor, changes

484

)

444

)

485

all_copies[current_rev] = current_copies

445

all_copies[current_rev] = current_copies

486

446

487

# filter out internal details and return a {dest: source mapping}

447

# filter out internal details and return a {dest: source mapping}

488

final_copies = {}

448

final_copies = {}

489

for dest, (tt, source) in all_copies[targetrev].items():

449

for dest, (tt, source) in all_copies[targetrev].items():

490

if source is not None:

450

if source is not None:

491

final_copies[dest] = source

451

final_copies[dest] = source

492

return final_copies

452

return final_copies

493

453

494

454

495

def _merge_copies_dict(minor, major, isancestor, changes):

455

def _merge_copies_dict(minor, major, isancestor, changes):

496

"""merge two copies-mapping together, minor and major

456

"""merge two copies-mapping together, minor and major

497

457

498

In case of conflict, value from "major" will be picked.

458

In case of conflict, value from "major" will be picked.

499

459

500

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

460

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

501

ancestors of `high_rev`,

461

ancestors of `high_rev`,

502

462

503

- `ismerged(path)`: callable return True if `path` have been merged in the

463

- `ismerged(path)`: callable return True if `path` have been merged in the

504

current revision,

464

current revision,

505

465

506

return the resulting dict (in practice, the "minor" object, updated)

466

return the resulting dict (in practice, the "minor" object, updated)

507

"""

467

"""

508

for dest, value in major.items():

468

for dest, value in major.items():

509

other = minor.get(dest)

469

other = minor.get(dest)

510

if other is None:

470

if other is None:

511

minor[dest] = value

471

minor[dest] = value

512

else:

472

else:

513

new_tt = value[0]

473

new_tt = value[0]

514

other_tt = other[0]

474

other_tt = other[0]

515

if value[1] == other[1]:

475

if value[1] == other[1]:

516

continue

476

continue

517

# content from "major" wins, unless it is older

477

# content from "major" wins, unless it is older

518

# than the branch point or there is a merge

478

# than the branch point or there is a merge

519

if new_tt == other_tt:

479

if new_tt == other_tt:

520

minor[dest] = value

480

minor[dest] = value

521

elif (

481

elif (

522

changes is not None

482

changes is not None

523

and value[1] is None

483

and value[1] is None

524

and dest in changes.salvaged

484

and dest in changes.salvaged

525

):

485

):

526

pass

486

pass

527

elif (

487

elif (

528

changes is not None

488

changes is not None

529

and other[1] is None

489

and other[1] is None

530

and dest in changes.salvaged

490

and dest in changes.salvaged

531

):

491

):

532

minor[dest] = value

492

minor[dest] = value

533

elif changes is not None and dest in changes.merged:

493

elif changes is not None and dest in changes.merged:

534

minor[dest] = value

494

minor[dest] = value

535

elif not isancestor(new_tt, other_tt):

495

elif not isancestor(new_tt, other_tt):

536

if value[1] is not None:

496

if value[1] is not None:

537

minor[dest] = value

497

minor[dest] = value

538

elif isancestor(other_tt, new_tt):

498

elif isancestor(other_tt, new_tt):

539

minor[dest] = value

499

minor[dest] = value

540

return minor

500

return minor

541

501

542

502

543

def _revinfo_getter_extra(repo):

503

def _revinfo_getter_extra(repo):

544

"""return a function that return multiple data given a <rev>"i

504

"""return a function that return multiple data given a <rev>"i

545

505

546

* p1: revision number of first parent

506

* p1: revision number of first parent

547

* p2: revision number of first parent

507

* p2: revision number of first parent

548

* p1copies: mapping of copies from p1

508

* p1copies: mapping of copies from p1

549

* p2copies: mapping of copies from p2

509

* p2copies: mapping of copies from p2

550

* removed: a list of removed files

510

* removed: a list of removed files

551

* ismerged: a callback to know if file was merged in that revision

511

* ismerged: a callback to know if file was merged in that revision

552

"""

512

"""

553

cl = repo.changelog

513

cl = repo.changelog

554

parents = cl.parentrevs

514

parents = cl.parentrevs

555

515

556

def get_ismerged(rev):

516

def get_ismerged(rev):

557

ctx = repo[rev]

517

ctx = repo[rev]

558

518

559

def ismerged(path):

519

def ismerged(path):

560

if path not in ctx.files():

520

if path not in ctx.files():

561

return False

521

return False

562

fctx = ctx[path]

522

fctx = ctx[path]

563

parents = fctx._filelog.parents(fctx._filenode)

523

parents = fctx._filelog.parents(fctx._filenode)

564

nb_parents = 0

524

nb_parents = 0

565

for n in parents:

525

for n in parents:

566

if n != nullid:

526

if n != nullid:

567

nb_parents += 1

527

nb_parents += 1

568

return nb_parents >= 2

528

return nb_parents >= 2

569

529

570

return ismerged

530

return ismerged

571

531

572

def revinfo(rev):

532

def revinfo(rev):

573

p1, p2 = parents(rev)

533

p1, p2 = parents(rev)

574

ctx = repo[rev]

534

ctx = repo[rev]

575

p1copies, p2copies = ctx._copies

535

p1copies, p2copies = ctx._copies

576

removed = ctx.filesremoved()

536

removed = ctx.filesremoved()

577

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

537

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

578

538

579

return revinfo

539

return revinfo

580

540

581

541

582

def _combine_changeset_copies_extra(

542

def _combine_changeset_copies_extra(

583

revs, children, targetrev, revinfo, match, isancestor

543

revs, children, targetrev, revinfo, match, isancestor

584

):

544

):

585

"""version of `_combine_changeset_copies` that works with the Google

545

"""version of `_combine_changeset_copies` that works with the Google

586

specific "extra" based storage for copy information"""

546

specific "extra" based storage for copy information"""

587

all_copies = {}

547

all_copies = {}

588

alwaysmatch = match.always()

548

alwaysmatch = match.always()

589

for r in revs:

549

for r in revs:

590

copies = all_copies.pop(r, None)

550

copies = all_copies.pop(r, None)

591

if copies is None:

551

if copies is None:

592

# this is a root

552

# this is a root

593

copies = {}

553

copies = {}

594

for i, c in enumerate(children[r]):

554

for i, c in enumerate(children[r]):

595

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

555

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

596

if r == p1:

556

if r == p1:

597

parent = 1

557

parent = 1

598

childcopies = p1copies

558

childcopies = p1copies

599

else:

559

else:

600

assert r == p2

560

assert r == p2

601

parent = 2

561

parent = 2

602

childcopies = p2copies

562

childcopies = p2copies

603

if not alwaysmatch:

563

if not alwaysmatch:

604

childcopies = {

564

childcopies = {

605

dst: src for dst, src in childcopies.items() if match(dst)

565

dst: src for dst, src in childcopies.items() if match(dst)

606

}

566

}

607

newcopies = copies

567

newcopies = copies

608

if childcopies:

568

if childcopies:

609

newcopies = copies.copy()

569

newcopies = copies.copy()

610

for dest, source in pycompat.iteritems(childcopies):

570

for dest, source in pycompat.iteritems(childcopies):

611

prev = copies.get(source)

571

prev = copies.get(source)

612

if prev is not None and prev[1] is not None:

572

if prev is not None and prev[1] is not None:

613

source = prev[1]

573

source = prev[1]

614

newcopies[dest] = (c, source)

574

newcopies[dest] = (c, source)

615

assert newcopies is not copies

575

assert newcopies is not copies

616

for f in removed:

576

for f in removed:

617

if f in newcopies:

577

if f in newcopies:

618

if newcopies is copies:

578

if newcopies is copies:

619

# copy on write to avoid affecting potential other

579

# copy on write to avoid affecting potential other

620

# branches. when there are no other branches, this

580

# branches. when there are no other branches, this

621

# could be avoided.

581

# could be avoided.

622

newcopies = copies.copy()

582

newcopies = copies.copy()

623

newcopies[f] = (c, None)

583

newcopies[f] = (c, None)

624

othercopies = all_copies.get(c)

584

othercopies = all_copies.get(c)

625

if othercopies is None:

585

if othercopies is None:

626

all_copies[c] = newcopies

586

all_copies[c] = newcopies

627

else:

587

else:

628

# we are the second parent to work on c, we need to merge our

588

# we are the second parent to work on c, we need to merge our

629

# work with the other.

589

# work with the other.

630

#

590

#

631

# In case of conflict, parent 1 take precedence over parent 2.

591

# In case of conflict, parent 1 take precedence over parent 2.

632

# This is an arbitrary choice made anew when implementing

592

# This is an arbitrary choice made anew when implementing

633

# changeset based copies. It was made without regards with

593

# changeset based copies. It was made without regards with

634

# potential filelog related behavior.

594

# potential filelog related behavior.

635

if parent == 1:

595

if parent == 1:

636

_merge_copies_dict_extra(

596

_merge_copies_dict_extra(

637

othercopies, newcopies, isancestor, ismerged

597

othercopies, newcopies, isancestor, ismerged

638

)

598

)

639

else:

599

else:

640

_merge_copies_dict_extra(

600

_merge_copies_dict_extra(

641

newcopies, othercopies, isancestor, ismerged

601

newcopies, othercopies, isancestor, ismerged

642

)

602

)

643

all_copies[c] = newcopies

603

all_copies[c] = newcopies

644

604

645

final_copies = {}

605

final_copies = {}

646

for dest, (tt, source) in all_copies[targetrev].items():

606

for dest, (tt, source) in all_copies[targetrev].items():

647

if source is not None:

607

if source is not None:

648

final_copies[dest] = source

608

final_copies[dest] = source

649

return final_copies

609

return final_copies

650

610

651

611

652

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

612

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

653

"""version of `_merge_copies_dict` that works with the Google

613

"""version of `_merge_copies_dict` that works with the Google

654

specific "extra" based storage for copy information"""

614

specific "extra" based storage for copy information"""

655

for dest, value in major.items():

615

for dest, value in major.items():

656

other = minor.get(dest)

616

other = minor.get(dest)

657

if other is None:

617

if other is None:

658

minor[dest] = value

618

minor[dest] = value

659

else:

619

else:

660

new_tt = value[0]

620

new_tt = value[0]

661

other_tt = other[0]

621

other_tt = other[0]

662

if value[1] == other[1]:

622

if value[1] == other[1]:

663

continue

623

continue

664

# content from "major" wins, unless it is older

624

# content from "major" wins, unless it is older

665

# than the branch point or there is a merge

625

# than the branch point or there is a merge

666

if (

626

if (

667

new_tt == other_tt

627

new_tt == other_tt

668

or not isancestor(new_tt, other_tt)

628

or not isancestor(new_tt, other_tt)

669

or ismerged(dest)

629

or ismerged(dest)

670

):

630

):

671

minor[dest] = value

631

minor[dest] = value

672

632

673

633

674

def _forwardcopies(a, b, base=None, match=None):

634

def _forwardcopies(a, b, base=None, match=None):

675

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

635

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

676

636

677

if base is None:

637

if base is None:

678

base = a

638

base = a

679

match = a.repo().narrowmatch(match)

639

match = a.repo().narrowmatch(match)

680

# check for working copy

640

# check for working copy

681

if b.rev() is None:

641

if b.rev() is None:

682

cm = _committedforwardcopies(a, b.p1(), base, match)

642

cm = _committedforwardcopies(a, b.p1(), base, match)

683

# combine copies from dirstate if necessary

643

# combine copies from dirstate if necessary

684

copies = _chain(cm, _dirstatecopies(b._repo, match))

644

copies = _chain(cm, _dirstatecopies(b._repo, match))

685

else:

645

else:

686

copies = _committedforwardcopies(a, b, base, match)

646

copies = _committedforwardcopies(a, b, base, match)

687

return copies

647

return copies

688

648

689

649

690

def _backwardrenames(a, b, match):

650

def _backwardrenames(a, b, match):

691

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

651

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

692

return {}

652

return {}

693

653

694

# Even though we're not taking copies into account, 1:n rename situations

654

# Even though we're not taking copies into account, 1:n rename situations

695

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

655

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

696

# arbitrarily pick one of the renames.

656

# arbitrarily pick one of the renames.

697

# We don't want to pass in "match" here, since that would filter

657

# We don't want to pass in "match" here, since that would filter

698

# the destination by it. Since we're reversing the copies, we want

658

# the destination by it. Since we're reversing the copies, we want

699

# to filter the source instead.

659

# to filter the source instead.

700

f = _forwardcopies(b, a)

660

f = _forwardcopies(b, a)

701

r = {}

661

r = {}

702

for k, v in sorted(pycompat.iteritems(f)):

662

for k, v in sorted(pycompat.iteritems(f)):

703

if match and not match(v):

663

if match and not match(v):

704

continue

664

continue

705

# remove copies

665

# remove copies

706

if v in a:

666

if v in a:

707

continue

667

continue

708

r[v] = k

668

r[v] = k

709

return r

669

return r

710

670

711

671

712

def pathcopies(x, y, match=None):

672

def pathcopies(x, y, match=None):

713

"""find {dst@y: src@x} copy mapping for directed compare"""

673

"""find {dst@y: src@x} copy mapping for directed compare"""

714

repo = x._repo

674

repo = x._repo

715

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

675

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

716

if debug:

676

if debug:

717

repo.ui.debug(

677

repo.ui.debug(

718

b'debug.copies: searching copies from %s to %s\n' % (x, y)

678

b'debug.copies: searching copies from %s to %s\n' % (x, y)

719

)

679

)

720

if x == y or not x or not y:

680

if x == y or not x or not y:

721

return {}

681

return {}

722

if y.rev() is None and x == y.p1():

682

if y.rev() is None and x == y.p1():

723

if debug:

683

if debug:

724

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

684

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

725

# short-circuit to avoid issues with merge states

685

# short-circuit to avoid issues with merge states

726

return _dirstatecopies(repo, match)

686

return _dirstatecopies(repo, match)

727

a = y.ancestor(x)

687

a = y.ancestor(x)

728

if a == x:

688

if a == x:

729

if debug:

689

if debug:

730

repo.ui.debug(b'debug.copies: search mode: forward\n')

690

repo.ui.debug(b'debug.copies: search mode: forward\n')

731

copies = _forwardcopies(x, y, match=match)

691

copies = _forwardcopies(x, y, match=match)

732

elif a == y:

692

elif a == y:

733

if debug:

693

if debug:

734

repo.ui.debug(b'debug.copies: search mode: backward\n')

694

repo.ui.debug(b'debug.copies: search mode: backward\n')

735

copies = _backwardrenames(x, y, match=match)

695

copies = _backwardrenames(x, y, match=match)

736

else:

696

else:

737

if debug:

697

if debug:

738

repo.ui.debug(b'debug.copies: search mode: combined\n')

698

repo.ui.debug(b'debug.copies: search mode: combined\n')

739

base = None

699

base = None

740

if a.rev() != nullrev:

700

if a.rev() != nullrev:

741

base = x

701

base = x

742

copies = _chain(

702

copies = _chain(

743

_backwardrenames(x, a, match=match),

703

_backwardrenames(x, a, match=match),

744

_forwardcopies(a, y, base, match=match),

704

_forwardcopies(a, y, base, match=match),

745

)

705

)

746

_filter(x, y, copies)

706

_filter(x, y, copies)

747

return copies

707

return copies

748

708

749

709

750

def mergecopies(repo, c1, c2, base):

710

def mergecopies(repo, c1, c2, base):

751

"""

711

"""

752

Finds moves and copies between context c1 and c2 that are relevant for

712

Finds moves and copies between context c1 and c2 that are relevant for

753

merging. 'base' will be used as the merge base.

713

merging. 'base' will be used as the merge base.

754

714

755

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

715

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

756

files that were moved/ copied in one merge parent and modified in another.

716

files that were moved/ copied in one merge parent and modified in another.

757

For example:

717

For example:

758

718

759

o ---> 4 another commit

719

o ---> 4 another commit

760

|

720

|

761

| o ---> 3 commit that modifies a.txt

721

| o ---> 3 commit that modifies a.txt

762

| /

722

| /

763

o / ---> 2 commit that moves a.txt to b.txt

723

o / ---> 2 commit that moves a.txt to b.txt

764

|/

724

|/

765

o ---> 1 merge base

725

o ---> 1 merge base

766

726

767

If we try to rebase revision 3 on revision 4, since there is no a.txt in

727

If we try to rebase revision 3 on revision 4, since there is no a.txt in

768

revision 4, and if user have copytrace disabled, we prints the following

728

revision 4, and if user have copytrace disabled, we prints the following

769

message:

729

message:

770

730

771

```other changed <file> which local deleted```

731

```other changed <file> which local deleted```

772

732

773

Returns a tuple where:

733

Returns a tuple where:

774

734

775

"branch_copies" an instance of branch_copies.

735

"branch_copies" an instance of branch_copies.

776

736

777

"diverge" is a mapping of source name -> list of destination names

737

"diverge" is a mapping of source name -> list of destination names

778

for divergent renames.

738

for divergent renames.

779

739

780

This function calls different copytracing algorithms based on config.

740

This function calls different copytracing algorithms based on config.

781

"""

741

"""

782

# avoid silly behavior for update from empty dir

742

# avoid silly behavior for update from empty dir

783

if not c1 or not c2 or c1 == c2:

743

if not c1 or not c2 or c1 == c2:

784

return branch_copies(), branch_copies(), {}

744

return branch_copies(), branch_copies(), {}

785

745

786

narrowmatch = c1.repo().narrowmatch()

746

narrowmatch = c1.repo().narrowmatch()

787

747

788

# avoid silly behavior for parent -> working dir

748

# avoid silly behavior for parent -> working dir

789

if c2.node() is None and c1.node() == repo.dirstate.p1():

749

if c2.node() is None and c1.node() == repo.dirstate.p1():

790

return (

750

return (

791

branch_copies(_dirstatecopies(repo, narrowmatch)),

751

branch_copies(_dirstatecopies(repo, narrowmatch)),

792

branch_copies(),

752

branch_copies(),

793

{},

753

{},

794

)

754

)

795

755

796

copytracing = repo.ui.config(b'experimental', b'copytrace')

756

copytracing = repo.ui.config(b'experimental', b'copytrace')

797

if stringutil.parsebool(copytracing) is False:

757

if stringutil.parsebool(copytracing) is False:

798

# stringutil.parsebool() returns None when it is unable to parse the

758

# stringutil.parsebool() returns None when it is unable to parse the

799

# value, so we should rely on making sure copytracing is on such cases

759

# value, so we should rely on making sure copytracing is on such cases

800

return branch_copies(), branch_copies(), {}

760

return branch_copies(), branch_copies(), {}

801

761

802

if usechangesetcentricalgo(repo):

762

if usechangesetcentricalgo(repo):

803

# The heuristics don't make sense when we need changeset-centric algos

763

# The heuristics don't make sense when we need changeset-centric algos

804

return _fullcopytracing(repo, c1, c2, base)

764

return _fullcopytracing(repo, c1, c2, base)

805

765

806

# Copy trace disabling is explicitly below the node == p1 logic above

766

# Copy trace disabling is explicitly below the node == p1 logic above

807

# because the logic above is required for a simple copy to be kept across a

767

# because the logic above is required for a simple copy to be kept across a

808

# rebase.

768

# rebase.

809

if copytracing == b'heuristics':

769

if copytracing == b'heuristics':

810

# Do full copytracing if only non-public revisions are involved as

770

# Do full copytracing if only non-public revisions are involved as

811

# that will be fast enough and will also cover the copies which could

771

# that will be fast enough and will also cover the copies which could

812

# be missed by heuristics

772

# be missed by heuristics

813

if _isfullcopytraceable(repo, c1, base):

773

if _isfullcopytraceable(repo, c1, base):

814

return _fullcopytracing(repo, c1, c2, base)

774

return _fullcopytracing(repo, c1, c2, base)

815

return _heuristicscopytracing(repo, c1, c2, base)

775

return _heuristicscopytracing(repo, c1, c2, base)

816

else:

776

else:

817

return _fullcopytracing(repo, c1, c2, base)

777

return _fullcopytracing(repo, c1, c2, base)

818

778

819

779

820

def _isfullcopytraceable(repo, c1, base):

780

def _isfullcopytraceable(repo, c1, base):

821

"""Checks that if base, source and destination are all no-public branches,

781

"""Checks that if base, source and destination are all no-public branches,

822

if yes let's use the full copytrace algorithm for increased capabilities

782

if yes let's use the full copytrace algorithm for increased capabilities

823

since it will be fast enough.

783

since it will be fast enough.

824

784

825

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

785

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

826

number of changesets from c1 to base such that if number of changesets are

786

number of changesets from c1 to base such that if number of changesets are

827

more than the limit, full copytracing algorithm won't be used.

787

more than the limit, full copytracing algorithm won't be used.

828

"""

788

"""

829

if c1.rev() is None:

789

if c1.rev() is None:

830

c1 = c1.p1()

790

c1 = c1.p1()

831

if c1.mutable() and base.mutable():

791

if c1.mutable() and base.mutable():

832

sourcecommitlimit = repo.ui.configint(

792

sourcecommitlimit = repo.ui.configint(

833

b'experimental', b'copytrace.sourcecommitlimit'

793

b'experimental', b'copytrace.sourcecommitlimit'

834

)

794

)

835

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

795

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

836

return commits < sourcecommitlimit

796

return commits < sourcecommitlimit

837

return False

797

return False

838

798

839

799

840

def _checksinglesidecopies(

800

def _checksinglesidecopies(

841

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

801

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

842

):

802

):

843

if src not in m2:

803

if src not in m2:

844

# deleted on side 2

804

# deleted on side 2

845

if src not in m1:

805

if src not in m1:

846

# renamed on side 1, deleted on side 2

806

# renamed on side 1, deleted on side 2

847

renamedelete[src] = dsts1

807

renamedelete[src] = dsts1

848

elif src not in mb:

808

elif src not in mb:

849

# Work around the "short-circuit to avoid issues with merge states"

809

# Work around the "short-circuit to avoid issues with merge states"

850

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

810

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

851

# destination doesn't exist in y.

811

# destination doesn't exist in y.

852

pass

812

pass

853

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

813

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

854

return

814

return

855

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

815

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

856

# modified on side 2

816

# modified on side 2

857

for dst in dsts1:

817

for dst in dsts1:

858

copy[dst] = src

818

copy[dst] = src

859

819

860

820

861

class branch_copies(object):

821

class branch_copies(object):

862

"""Information about copies made on one side of a merge/graft.

822

"""Information about copies made on one side of a merge/graft.

863

823

864

"copy" is a mapping from destination name -> source name,

824

"copy" is a mapping from destination name -> source name,

865

where source is in c1 and destination is in c2 or vice-versa.

825

where source is in c1 and destination is in c2 or vice-versa.

866

826

867

"movewithdir" is a mapping from source name -> destination name,

827

"movewithdir" is a mapping from source name -> destination name,

868

where the file at source present in one context but not the other

828

where the file at source present in one context but not the other

869

needs to be moved to destination by the merge process, because the

829

needs to be moved to destination by the merge process, because the

870

other context moved the directory it is in.

830

other context moved the directory it is in.

871

831

872

"renamedelete" is a mapping of source name -> list of destination

832

"renamedelete" is a mapping of source name -> list of destination

873

names for files deleted in c1 that were renamed in c2 or vice-versa.

833

names for files deleted in c1 that were renamed in c2 or vice-versa.

874

834

875

"dirmove" is a mapping of detected source dir -> destination dir renames.

835

"dirmove" is a mapping of detected source dir -> destination dir renames.

876

This is needed for handling changes to new files previously grafted into

836

This is needed for handling changes to new files previously grafted into

877

renamed directories.

837

renamed directories.

878

"""

838

"""

879

839

880

def __init__(

840

def __init__(

881

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

841

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

882

):

842

):

883

self.copy = {} if copy is None else copy

843

self.copy = {} if copy is None else copy

884

self.renamedelete = {} if renamedelete is None else renamedelete

844

self.renamedelete = {} if renamedelete is None else renamedelete

885

self.dirmove = {} if dirmove is None else dirmove

845

self.dirmove = {} if dirmove is None else dirmove

886

self.movewithdir = {} if movewithdir is None else movewithdir

846

self.movewithdir = {} if movewithdir is None else movewithdir

887

847

888

def __repr__(self):

848

def __repr__(self):

889

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

849

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

890

self.copy,

850

self.copy,

891

self.renamedelete,

851

self.renamedelete,

892

self.dirmove,

852

self.dirmove,

893

self.movewithdir,

853

self.movewithdir,

894

)

854

)

895

855

896

856

897

def _fullcopytracing(repo, c1, c2, base):

857

def _fullcopytracing(repo, c1, c2, base):

898

"""The full copytracing algorithm which finds all the new files that were

858

"""The full copytracing algorithm which finds all the new files that were

899

added from merge base up to the top commit and for each file it checks if

859

added from merge base up to the top commit and for each file it checks if

900

this file was copied from another file.

860

this file was copied from another file.

901

861

902

This is pretty slow when a lot of changesets are involved but will track all

862

This is pretty slow when a lot of changesets are involved but will track all

903

the copies.

863

the copies.

904

"""

864

"""

905

m1 = c1.manifest()

865

m1 = c1.manifest()

906

m2 = c2.manifest()

866

m2 = c2.manifest()

907

mb = base.manifest()

867

mb = base.manifest()

908

868

909

copies1 = pathcopies(base, c1)

869

copies1 = pathcopies(base, c1)

910

copies2 = pathcopies(base, c2)

870

copies2 = pathcopies(base, c2)

911

871

912

if not (copies1 or copies2):

872

if not (copies1 or copies2):

913

return branch_copies(), branch_copies(), {}

873

return branch_copies(), branch_copies(), {}

914

874

915

inversecopies1 = {}

875

inversecopies1 = {}

916

inversecopies2 = {}

876

inversecopies2 = {}

917

for dst, src in copies1.items():

877

for dst, src in copies1.items():

918

inversecopies1.setdefault(src, []).append(dst)

878

inversecopies1.setdefault(src, []).append(dst)

919

for dst, src in copies2.items():

879

for dst, src in copies2.items():

920

inversecopies2.setdefault(src, []).append(dst)

880

inversecopies2.setdefault(src, []).append(dst)

921

881

922

copy1 = {}

882

copy1 = {}

923

copy2 = {}

883

copy2 = {}

924

diverge = {}

884

diverge = {}

925

renamedelete1 = {}

885

renamedelete1 = {}

926

renamedelete2 = {}

886

renamedelete2 = {}

927

allsources = set(inversecopies1) | set(inversecopies2)

887

allsources = set(inversecopies1) | set(inversecopies2)

928

for src in allsources:

888

for src in allsources:

929

dsts1 = inversecopies1.get(src)

889

dsts1 = inversecopies1.get(src)

930

dsts2 = inversecopies2.get(src)

890

dsts2 = inversecopies2.get(src)

931

if dsts1 and dsts2:

891

if dsts1 and dsts2:

932

# copied/renamed on both sides

892

# copied/renamed on both sides

933

if src not in m1 and src not in m2:

893

if src not in m1 and src not in m2:

934

# renamed on both sides

894

# renamed on both sides

935

dsts1 = set(dsts1)

895

dsts1 = set(dsts1)

936

dsts2 = set(dsts2)

896

dsts2 = set(dsts2)

937

# If there's some overlap in the rename destinations, we

897

# If there's some overlap in the rename destinations, we

938

# consider it not divergent. For example, if side 1 copies 'a'

898

# consider it not divergent. For example, if side 1 copies 'a'

939

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

899

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

940

# and 'd' and deletes 'a'.

900

# and 'd' and deletes 'a'.

941

if dsts1 & dsts2:

901

if dsts1 & dsts2:

942

for dst in dsts1 & dsts2:

902

for dst in dsts1 & dsts2:

943

copy1[dst] = src

903

copy1[dst] = src

944

copy2[dst] = src

904

copy2[dst] = src

945

else:

905

else:

946

diverge[src] = sorted(dsts1 | dsts2)

906

diverge[src] = sorted(dsts1 | dsts2)

947

elif src in m1 and src in m2:

907

elif src in m1 and src in m2:

948

# copied on both sides

908

# copied on both sides

949

dsts1 = set(dsts1)

909

dsts1 = set(dsts1)

950

dsts2 = set(dsts2)

910

dsts2 = set(dsts2)

951

for dst in dsts1 & dsts2:

911

for dst in dsts1 & dsts2:

952

copy1[dst] = src

912

copy1[dst] = src

953

copy2[dst] = src

913

copy2[dst] = src

954

# TODO: Handle cases where it was renamed on one side and copied

914

# TODO: Handle cases where it was renamed on one side and copied

955

# on the other side

915

# on the other side

956

elif dsts1:

916

elif dsts1:

957

# copied/renamed only on side 1

917

# copied/renamed only on side 1

958

_checksinglesidecopies(

918

_checksinglesidecopies(

959

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

919

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

960

)

920

)

961

elif dsts2:

921

elif dsts2:

962

# copied/renamed only on side 2

922

# copied/renamed only on side 2

963

_checksinglesidecopies(

923

_checksinglesidecopies(

964

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

924

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

965

)

925

)

966

926

967

# find interesting file sets from manifests

927

# find interesting file sets from manifests

968

cache = []

928

cache = []

969

929

970

def _get_addedfiles(idx):

930

def _get_addedfiles(idx):

971

if not cache:

931

if not cache:

972

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

932

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

973

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

933

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

974

u1 = sorted(addedinm1 - addedinm2)

934

u1 = sorted(addedinm1 - addedinm2)

975

u2 = sorted(addedinm2 - addedinm1)

935

u2 = sorted(addedinm2 - addedinm1)

976

cache.extend((u1, u2))

936

cache.extend((u1, u2))

977

return cache[idx]

937

return cache[idx]

978

938

979

u1fn = lambda: _get_addedfiles(0)

939

u1fn = lambda: _get_addedfiles(0)

980

u2fn = lambda: _get_addedfiles(1)

940

u2fn = lambda: _get_addedfiles(1)

981

if repo.ui.debugflag:

941

if repo.ui.debugflag:

982

u1 = u1fn()

942

u1 = u1fn()

983

u2 = u2fn()

943

u2 = u2fn()

984

944

985

header = b" unmatched files in %s"

945

header = b" unmatched files in %s"

986

if u1:

946

if u1:

987

repo.ui.debug(

947

repo.ui.debug(

988

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

948

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

989

)

949

)

990

if u2:

950

if u2:

991

repo.ui.debug(

951

repo.ui.debug(

992

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

952

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

993

)

953

)

994

954

995

renamedeleteset = set()

955

renamedeleteset = set()

996

divergeset = set()

956

divergeset = set()

997

for dsts in diverge.values():

957

for dsts in diverge.values():

998

divergeset.update(dsts)

958

divergeset.update(dsts)

999

for dsts in renamedelete1.values():

959

for dsts in renamedelete1.values():

1000

renamedeleteset.update(dsts)

960

renamedeleteset.update(dsts)

1001

for dsts in renamedelete2.values():

961

for dsts in renamedelete2.values():

1002

renamedeleteset.update(dsts)

962

renamedeleteset.update(dsts)

1003

963

1004

repo.ui.debug(

964

repo.ui.debug(

1005

b" all copies found (* = to merge, ! = divergent, "

965

b" all copies found (* = to merge, ! = divergent, "

1006

b"% = renamed and deleted):\n"

966

b"% = renamed and deleted):\n"

1007

)

967

)

1008

for side, copies in ((b"local", copies1), (b"remote", copies2)):

968

for side, copies in ((b"local", copies1), (b"remote", copies2)):

1009

if not copies:

969

if not copies:

1010

continue

970

continue

1011

repo.ui.debug(b" on %s side:\n" % side)

971

repo.ui.debug(b" on %s side:\n" % side)

1012

for f in sorted(copies):

972

for f in sorted(copies):

1013

note = b""

973

note = b""

1014

if f in copy1 or f in copy2:

974

if f in copy1 or f in copy2:

1015

note += b"*"

975

note += b"*"

1016

if f in divergeset:

976

if f in divergeset:

1017

note += b"!"

977

note += b"!"

1018

if f in renamedeleteset:

978

if f in renamedeleteset:

1019

note += b"%"

979

note += b"%"

1020

repo.ui.debug(

980

repo.ui.debug(

1021

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

981

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

1022

)

982

)

1023

del renamedeleteset

983

del renamedeleteset

1024

del divergeset

984

del divergeset

1025

985

1026

repo.ui.debug(b" checking for directory renames\n")

986

repo.ui.debug(b" checking for directory renames\n")

1027

987

1028

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

988

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

1029

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

989

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

1030

990

1031

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

991

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

1032

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

992

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

1033

993

1034

return branch_copies1, branch_copies2, diverge

994

return branch_copies1, branch_copies2, diverge

1035

995

1036

996

1037

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

997

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1038

"""Finds moved directories and files that should move with them.

998

"""Finds moved directories and files that should move with them.

1039

999

1040

ctx: the context for one of the sides

1000

ctx: the context for one of the sides

1041

copy: files copied on the same side (as ctx)

1001

copy: files copied on the same side (as ctx)

1042

fullcopy: files copied on the same side (as ctx), including those that

1002

fullcopy: files copied on the same side (as ctx), including those that

1043

merge.manifestmerge() won't care about

1003

merge.manifestmerge() won't care about

1044

addedfilesfn: function returning added files on the other side (compared to

1004

addedfilesfn: function returning added files on the other side (compared to

1045

ctx)

1005

ctx)

1046

"""

1006

"""

1047

# generate a directory move map

1007

# generate a directory move map

1048

invalid = set()

1008

invalid = set()

1049

dirmove = {}

1009

dirmove = {}

1050

1010

1051

# examine each file copy for a potential directory move, which is

1011

# examine each file copy for a potential directory move, which is

1052

# when all the files in a directory are moved to a new directory

1012

# when all the files in a directory are moved to a new directory

1053

for dst, src in pycompat.iteritems(fullcopy):

1013

for dst, src in pycompat.iteritems(fullcopy):

1054

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1014

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1055

if dsrc in invalid:

1015

if dsrc in invalid:

1056

# already seen to be uninteresting

1016

# already seen to be uninteresting

1057

continue

1017

continue

1058

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1018

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1059

# directory wasn't entirely moved locally

1019

# directory wasn't entirely moved locally

1060

invalid.add(dsrc)

1020

invalid.add(dsrc)

1061

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1021

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1062

# files from the same directory moved to two different places

1022

# files from the same directory moved to two different places

1063

invalid.add(dsrc)

1023

invalid.add(dsrc)

1064

else:

1024

else:

1065

# looks good so far

1025

# looks good so far

1066

dirmove[dsrc] = ddst

1026

dirmove[dsrc] = ddst

1067

1027

1068

for i in invalid:

1028

for i in invalid:

1069

if i in dirmove:

1029

if i in dirmove:

1070

del dirmove[i]

1030

del dirmove[i]

1071

del invalid

1031

del invalid

1072

1032

1073

if not dirmove:

1033

if not dirmove:

1074

return {}, {}

1034

return {}, {}

1075

1035

1076

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1036

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1077

1037

1078

for d in dirmove:

1038

for d in dirmove:

1079

repo.ui.debug(

1039

repo.ui.debug(

1080

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1040

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1081

)

1041

)

1082

1042

1083

movewithdir = {}

1043

movewithdir = {}

1084

# check unaccounted nonoverlapping files against directory moves

1044

# check unaccounted nonoverlapping files against directory moves

1085

for f in addedfilesfn():

1045

for f in addedfilesfn():

1086

if f not in fullcopy:

1046

if f not in fullcopy:

1087

for d in dirmove:

1047

for d in dirmove:

1088

if f.startswith(d):

1048

if f.startswith(d):

1089

# new file added in a directory that was moved, move it

1049

# new file added in a directory that was moved, move it

1090

df = dirmove[d] + f[len(d) :]

1050

df = dirmove[d] + f[len(d) :]

1091

if df not in copy:

1051

if df not in copy:

1092

movewithdir[f] = df

1052

movewithdir[f] = df

1093

repo.ui.debug(

1053

repo.ui.debug(

1094

b" pending file src: '%s' -> dst: '%s'\n"

1054

b" pending file src: '%s' -> dst: '%s'\n"

1095

% (f, df)

1055

% (f, df)

1096

)

1056

)

1097

break

1057

break

1098

1058

1099

return dirmove, movewithdir

1059

return dirmove, movewithdir

1100

1060

1101

1061

1102

def _heuristicscopytracing(repo, c1, c2, base):

1062

def _heuristicscopytracing(repo, c1, c2, base):

1103

"""Fast copytracing using filename heuristics

1063

"""Fast copytracing using filename heuristics

1104

1064

1105

Assumes that moves or renames are of following two types:

1065

Assumes that moves or renames are of following two types:

1106

1066

1107

1) Inside a directory only (same directory name but different filenames)

1067

1) Inside a directory only (same directory name but different filenames)

1108

2) Move from one directory to another

1068

2) Move from one directory to another

1109

(same filenames but different directory names)

1069

(same filenames but different directory names)

1110

1070

1111

Works only when there are no merge commits in the "source branch".

1071

Works only when there are no merge commits in the "source branch".

1112

Source branch is commits from base up to c2 not including base.

1072

Source branch is commits from base up to c2 not including base.

1113

1073

1114

If merge is involved it fallbacks to _fullcopytracing().

1074

If merge is involved it fallbacks to _fullcopytracing().

1115

1075

1116

Can be used by setting the following config:

1076

Can be used by setting the following config:

1117

1077

1118

[experimental]

1078

[experimental]

1119

copytrace = heuristics

1079

copytrace = heuristics

1120

1080

1121

In some cases the copy/move candidates found by heuristics can be very large

1081

In some cases the copy/move candidates found by heuristics can be very large

1122

in number and that will make the algorithm slow. The number of possible

1082

in number and that will make the algorithm slow. The number of possible

1123

candidates to check can be limited by using the config

1083

candidates to check can be limited by using the config

1124

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1084

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1125

"""

1085

"""

1126

1086

1127

if c1.rev() is None:

1087

if c1.rev() is None:

1128

c1 = c1.p1()

1088

c1 = c1.p1()

1129

if c2.rev() is None:

1089

if c2.rev() is None:

1130

c2 = c2.p1()

1090

c2 = c2.p1()

1131

1091

1132

changedfiles = set()

1092

changedfiles = set()

1133

m1 = c1.manifest()

1093

m1 = c1.manifest()

1134

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1094

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1135

# If base is not in c2 branch, we switch to fullcopytracing

1095

# If base is not in c2 branch, we switch to fullcopytracing

1136

repo.ui.debug(

1096

repo.ui.debug(

1137

b"switching to full copytracing as base is not "

1097

b"switching to full copytracing as base is not "

1138

b"an ancestor of c2\n"

1098

b"an ancestor of c2\n"

1139

)

1099

)

1140

return _fullcopytracing(repo, c1, c2, base)

1100

return _fullcopytracing(repo, c1, c2, base)

1141

1101

1142

ctx = c2

1102

ctx = c2

1143

while ctx != base:

1103

while ctx != base:

1144

if len(ctx.parents()) == 2:

1104

if len(ctx.parents()) == 2:

1145

# To keep things simple let's not handle merges

1105

# To keep things simple let's not handle merges

1146

repo.ui.debug(b"switching to full copytracing because of merges\n")

1106

repo.ui.debug(b"switching to full copytracing because of merges\n")

1147

return _fullcopytracing(repo, c1, c2, base)

1107

return _fullcopytracing(repo, c1, c2, base)

1148

changedfiles.update(ctx.files())

1108

changedfiles.update(ctx.files())

1149

ctx = ctx.p1()

1109

ctx = ctx.p1()

1150

1110

1151

copies2 = {}

1111

copies2 = {}

1152

cp = _forwardcopies(base, c2)

1112

cp = _forwardcopies(base, c2)

1153

for dst, src in pycompat.iteritems(cp):

1113

for dst, src in pycompat.iteritems(cp):

1154

if src in m1:

1114

if src in m1:

1155

copies2[dst] = src

1115

copies2[dst] = src

1156

1116

1157

# file is missing if it isn't present in the destination, but is present in

1117

# file is missing if it isn't present in the destination, but is present in

1158

# the base and present in the source.

1118

# the base and present in the source.

1159

# Presence in the base is important to exclude added files, presence in the

1119

# Presence in the base is important to exclude added files, presence in the

1160

# source is important to exclude removed files.

1120

# source is important to exclude removed files.

1161

filt = lambda f: f not in m1 and f in base and f in c2

1121

filt = lambda f: f not in m1 and f in base and f in c2

1162

missingfiles = [f for f in changedfiles if filt(f)]

1122

missingfiles = [f for f in changedfiles if filt(f)]

1163

1123

1164

copies1 = {}

1124

copies1 = {}

1165

if missingfiles:

1125

if missingfiles:

1166

basenametofilename = collections.defaultdict(list)

1126

basenametofilename = collections.defaultdict(list)

1167

dirnametofilename = collections.defaultdict(list)

1127

dirnametofilename = collections.defaultdict(list)

1168

1128

1169

for f in m1.filesnotin(base.manifest()):

1129

for f in m1.filesnotin(base.manifest()):

1170

basename = os.path.basename(f)

1130

basename = os.path.basename(f)

1171

dirname = os.path.dirname(f)

1131

dirname = os.path.dirname(f)

1172

basenametofilename[basename].append(f)

1132

basenametofilename[basename].append(f)

1173

dirnametofilename[dirname].append(f)

1133

dirnametofilename[dirname].append(f)

1174

1134

1175

for f in missingfiles:

1135

for f in missingfiles:

1176

basename = os.path.basename(f)

1136

basename = os.path.basename(f)

1177

dirname = os.path.dirname(f)

1137

dirname = os.path.dirname(f)

1178

samebasename = basenametofilename[basename]

1138

samebasename = basenametofilename[basename]

1179

samedirname = dirnametofilename[dirname]

1139

samedirname = dirnametofilename[dirname]

1180

movecandidates = samebasename + samedirname

1140

movecandidates = samebasename + samedirname

1181

# f is guaranteed to be present in c2, that's why

1141

# f is guaranteed to be present in c2, that's why

1182

# c2.filectx(f) won't fail

1142

# c2.filectx(f) won't fail

1183

f2 = c2.filectx(f)

1143

f2 = c2.filectx(f)

1184

# we can have a lot of candidates which can slow down the heuristics

1144

# we can have a lot of candidates which can slow down the heuristics

1185

# config value to limit the number of candidates moves to check

1145

# config value to limit the number of candidates moves to check

1186

maxcandidates = repo.ui.configint(

1146

maxcandidates = repo.ui.configint(

1187

b'experimental', b'copytrace.movecandidateslimit'

1147

b'experimental', b'copytrace.movecandidateslimit'

1188

)

1148

)

1189

1149

1190

if len(movecandidates) > maxcandidates:

1150

if len(movecandidates) > maxcandidates:

1191

repo.ui.status(

1151

repo.ui.status(

1192

_(

1152

_(

1193

b"skipping copytracing for '%s', more "

1153

b"skipping copytracing for '%s', more "

1194

b"candidates than the limit: %d\n"

1154

b"candidates than the limit: %d\n"

1195

)

1155

)

1196

% (f, len(movecandidates))

1156

% (f, len(movecandidates))

1197

)

1157

)

1198

continue

1158

continue

1199

1159

1200

for candidate in movecandidates:

1160

for candidate in movecandidates:

1201

f1 = c1.filectx(candidate)

1161

f1 = c1.filectx(candidate)

1202

if _related(f1, f2):

1162

if _related(f1, f2):

1203

# if there are a few related copies then we'll merge

1163

# if there are a few related copies then we'll merge

1204

# changes into all of them. This matches the behaviour

1164

# changes into all of them. This matches the behaviour

1205

# of upstream copytracing

1165

# of upstream copytracing

1206

copies1[candidate] = f

1166

copies1[candidate] = f

1207

1167

1208

return branch_copies(copies1), branch_copies(copies2), {}

1168

return branch_copies(copies1), branch_copies(copies2), {}

1209

1169

1210

1170

1211

def _related(f1, f2):

1171

def _related(f1, f2):

1212

"""return True if f1 and f2 filectx have a common ancestor

1172

"""return True if f1 and f2 filectx have a common ancestor

1213

1173

1214

Walk back to common ancestor to see if the two files originate

1174

Walk back to common ancestor to see if the two files originate

1215

from the same file. Since workingfilectx's rev() is None it messes

1175

from the same file. Since workingfilectx's rev() is None it messes

1216

up the integer comparison logic, hence the pre-step check for

1176

up the integer comparison logic, hence the pre-step check for

1217

None (f1 and f2 can only be workingfilectx's initially).

1177

None (f1 and f2 can only be workingfilectx's initially).

1218

"""

1178

"""

1219

1179

1220

if f1 == f2:

1180

if f1 == f2:

1221

return True # a match

1181

return True # a match

1222

1182

1223

g1, g2 = f1.ancestors(), f2.ancestors()

1183

g1, g2 = f1.ancestors(), f2.ancestors()

1224

try:

1184

try:

1225

f1r, f2r = f1.linkrev(), f2.linkrev()

1185

f1r, f2r = f1.linkrev(), f2.linkrev()

1226

1186

1227

if f1r is None:

1187

if f1r is None:

1228

f1 = next(g1)

1188

f1 = next(g1)

1229

if f2r is None:

1189

if f2r is None:

1230

f2 = next(g2)

1190

f2 = next(g2)

1231

1191

1232

while True:

1192

while True:

1233

f1r, f2r = f1.linkrev(), f2.linkrev()

1193

f1r, f2r = f1.linkrev(), f2.linkrev()

1234

if f1r > f2r:

1194

if f1r > f2r:

1235

f1 = next(g1)

1195

f1 = next(g1)

1236

elif f2r > f1r:

1196

elif f2r > f1r:

1237

f2 = next(g2)

1197

f2 = next(g2)

1238

else: # f1 and f2 point to files in the same linkrev

1198

else: # f1 and f2 point to files in the same linkrev

1239

return f1 == f2 # true if they point to the same file

1199

return f1 == f2 # true if they point to the same file

1240

except StopIteration:

1200

except StopIteration:

1241

return False

1201

return False

1242

1202

1243

1203

1244

def graftcopies(wctx, ctx, base):

1204

def graftcopies(wctx, ctx, base):

1245

"""reproduce copies between base and ctx in the wctx

1205

"""reproduce copies between base and ctx in the wctx

1246

1206

1247

Unlike mergecopies(), this function will only consider copies between base

1207

Unlike mergecopies(), this function will only consider copies between base

1248

and ctx; it will ignore copies between base and wctx. Also unlike

1208

and ctx; it will ignore copies between base and wctx. Also unlike

1249

mergecopies(), this function will apply copies to the working copy (instead

1209

mergecopies(), this function will apply copies to the working copy (instead

1250

of just returning information about the copies). That makes it cheaper

1210

of just returning information about the copies). That makes it cheaper

1251

(especially in the common case of base==ctx.p1()) and useful also when

1211

(especially in the common case of base==ctx.p1()) and useful also when

1252

experimental.copytrace=off.

1212

experimental.copytrace=off.

1253

1213

1254

merge.update() will have already marked most copies, but it will only

1214

merge.update() will have already marked most copies, but it will only

1255

mark copies if it thinks the source files are related (see

1215

mark copies if it thinks the source files are related (see

1256

merge._related()). It will also not mark copies if the file wasn't modified

1216

merge._related()). It will also not mark copies if the file wasn't modified

1257

on the local side. This function adds the copies that were "missed"

1217

on the local side. This function adds the copies that were "missed"

1258

by merge.update().

1218

by merge.update().

1259

"""

1219

"""

1260

new_copies = pathcopies(base, ctx)

1220

new_copies = pathcopies(base, ctx)

1261

_filter(wctx.p1(), wctx, new_copies)

1221

_filter(wctx.p1(), wctx, new_copies)

1262

for dst, src in pycompat.iteritems(new_copies):

1222

for dst, src in pycompat.iteritems(new_copies):

1263

wctx[dst].markcopied(src)

1223

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import (
                 nullid,
                 nullrev,
             )
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
-                # A small cache to avoid doing the work twice for merges
-                # In the vast majority of cases, if we ask information for a revision
-                # about 1 parent, we'll later ask it for the other. So it make sense to
-                # keep the information around when reaching the first parent of a merge
-                # and dropping it after it was provided for the second parents.
-                # It exists cases were only one parent of the merge will be walked. It
-                # happens when the "destination" the copy tracing is descendant from a
-                # new root, not common with the "source". In that case, we will only walk
-                # through merge parents that are descendant of changesets common
-                # between "source" and "destination".
-                # With the current case implementation if such changesets have a copy
-                # information, we'll keep them in memory until the end of
-                # _changesetforwardcopies. We don't expect the case to be frequent
-                # enough to matters.
-                # In addition, it would be possible to reach pathological case, were
-                # many first parent are met before any second parent is reached. In
-                # that case the cache could grow. If this even become an issue one can
-                # safely introduce a maximum cache size. This would trade extra CPU/IO
-                # time to save memory.
-                merge_caches = {}
                 alwaysmatch = match.always()
                 if rustmod is not None and alwaysmatch:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
-                        value = None
-                        e = merge_caches.pop(rev, None)
-                        if e is not None:
-                            return e
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
-                        value = (p1, p2, raw)
+                        return (p1, p2, raw)
-                        if p1 != nullrev and p2 != nullrev:
-                            # XXX some case we over cache, IGNORE
-                            merge_caches[rev] = value
-                        return value
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
-                        value = None
-                        e = merge_caches.pop(rev, None)
-                        if e is not None:
-                            return e
-                        changes = None
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
-                        value = (p1, p2, changes)
+                        else:
-                        if p1 != nullrev and p2 != nullrev:
+                            changes = None
-                            # XXX some case we over cache, IGNORE
+                        return (p1, p2, changes)
-                            merge_caches[rev] = value
-                        return value
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
                     # this deal with the special case mentionned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
                     # We iterate over the childre, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
                     # An interresting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     return _combine_changeset_copies(
                         revs, children_count, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None and alwaysmatch:
                     return rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, isancestor
                     )
                 isancestor = cached_is_ancestor(isancestor)
                 all_copies = {}
                 # iterate over all the "children" side of copy tracing "edge"
                 for current_rev in revs:
                     p1, p2, changes = revinfo(current_rev)
                     current_copies = None
                     # iterate over all parents to chain the existing data with the
                     # data from the parent → child edge.
                     for parent, parent_rev in ((1, p1), (2, p2)):
                         if parent_rev == nullrev:
                             continue
                         remaining_children = children_count.get(parent_rev)
                         if remaining_children is None:
                             continue
                         remaining_children -= 1
                         children_count[parent_rev] = remaining_children
                         if remaining_children:
                             copies = all_copies.get(parent_rev, None)
                         else:
                             copies = all_copies.pop(parent_rev, None)
                         if copies is None:
                             # this is a root
                             copies = {}
                         newcopies = copies
                         # chain the data in the edge with the existing data
                         if changes is not None:
                             childcopies = {}
                             if parent == 1:
                                 childcopies = changes.copied_from_p1
                             elif parent == 2:
                                 childcopies = changes.copied_from_p2
                             if not alwaysmatch:
                                 childcopies = {
                                     dst: src
                                     for dst, src in childcopies.items()
                                     if match(dst)
                                 }
                             if childcopies:
                                 newcopies = copies.copy()
                                 for dest, source in pycompat.iteritems(childcopies):
                                     prev = copies.get(source)
                                     if prev is not None and prev[1] is not None:
                                         source = prev[1]
                                     newcopies[dest] = (current_rev, source)
                                 assert newcopies is not copies
                             if changes.removed:
                                 if newcopies is copies:
                                     newcopies = copies.copy()
                                 for f in changes.removed:
                                     if f in newcopies:
                                         if newcopies is copies:
                                             # copy on write to avoid affecting potential other
                                             # branches.  when there are no other branches, this
                                             # could be avoided.
                                             newcopies = copies.copy()
                                         newcopies[f] = (current_rev, None)
                         # check potential need to combine the data from another parent (for
                         # that child). See comment below for details.
                         if current_copies is None:
                             current_copies = newcopies
                         elif current_copies is newcopies:
                             # nothing to merge:
                             pass
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             assert parent == 2
                             current_copies = _merge_copies_dict(
                                 newcopies, current_copies, isancestor, changes
                             )
                     all_copies[current_rev] = current_copies
                 # filter out internal details and return a {dest: source mapping}
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if new_tt == other_tt:
                             minor[dest] = value
                         elif (
                             changes is not None
                             and value[1] is None
                             and dest in changes.salvaged
                         ):
                             pass
                         elif (
                             changes is not None
                             and other[1] is None
                             and dest in changes.salvaged
                         ):
                             minor[dest] = value
                         elif changes is not None and dest in changes.merged:
                             minor[dest] = value
                         elif not isancestor(new_tt, other_tt):
                             if value[1] is not None:
                                 minor[dest] = value
                             elif isancestor(other_tt, new_tt):
                                 minor[dest] = value
                 return minor
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)