upstream/mercurial-mirror Commit - r46214:ad6ebb6f

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

18

from . import (

18

from . import (

19

match as matchmod,

19

match as matchmod,

20

node,

20

node,

21

pathutil,

21

pathutil,

22

pycompat,

22

pycompat,

23

util,

23

util,

24

)

24

)

25

26

27

from .utils import stringutil

27

from .utils import stringutil

28

29

30

def _filter(src, dst, t):

30

def _filter(src, dst, t):

31

"""filters out invalid copies after chaining"""

31

"""filters out invalid copies after chaining"""

32

33

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

33

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

34

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

34

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

35

# in the following table (not including trivial cases). For example, case 2

35

# in the following table (not including trivial cases). For example, case 2

36

# is where a file existed in 'src' and remained under that name in 'mid' and

36

# is where a file existed in 'src' and remained under that name in 'mid' and

37

# then was renamed between 'mid' and 'dst'.

37

# then was renamed between 'mid' and 'dst'.

38

#

38

#

39

# case src mid dst result

39

# case src mid dst result

40

# 1 x y - -

40

# 1 x y - -

41

# 2 x y y x->y

41

# 2 x y y x->y

42

# 3 x y x -

42

# 3 x y x -

43

# 4 x y z x->z

43

# 4 x y z x->z

44

# 5 - x y -

44

# 5 - x y -

45

# 6 x x y x->y

45

# 6 x x y x->y

46

#

46

#

47

# _chain() takes care of chaining the copies in 'a' and 'b', but it

47

# _chain() takes care of chaining the copies in 'a' and 'b', but it

48

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

48

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

49

# between 5 and 6, so it includes all cases in its result.

49

# between 5 and 6, so it includes all cases in its result.

50

# Cases 1, 3, and 5 are then removed by _filter().

50

# Cases 1, 3, and 5 are then removed by _filter().

51

52

for k, v in list(t.items()):

52

for k, v in list(t.items()):

53

# remove copies from files that didn't exist

53

# remove copies from files that didn't exist

54

if v not in src:

54

if v not in src:

55

del t[k]

55

del t[k]

56

# remove criss-crossed copies

56

# remove criss-crossed copies

57

elif k in src and v in dst:

57

elif k in src and v in dst:

58

del t[k]

58

del t[k]

59

# remove copies to files that were then removed

59

# remove copies to files that were then removed

60

elif k not in dst:

60

elif k not in dst:

61

del t[k]

61

del t[k]

62

63

64

def _chain(prefix, suffix):

64

def _chain(prefix, suffix):

65

"""chain two sets of copies 'prefix' and 'suffix'"""

65

"""chain two sets of copies 'prefix' and 'suffix'"""

66

result = prefix.copy()

66

result = prefix.copy()

67

for key, value in pycompat.iteritems(suffix):

67

for key, value in pycompat.iteritems(suffix):

68

result[key] = prefix.get(value, value)

68

result[key] = prefix.get(value, value)

69

return result

69

return result

70

71

72

def _tracefile(fctx, am, basemf):

72

def _tracefile(fctx, am, basemf):

73

"""return file context that is the ancestor of fctx present in ancestor

73

"""return file context that is the ancestor of fctx present in ancestor

74

manifest am

74

manifest am

75

76

Note: we used to try and stop after a given limit, however checking if that

76

Note: we used to try and stop after a given limit, however checking if that

77

limit is reached turned out to be very expensive. we are better off

77

limit is reached turned out to be very expensive. we are better off

78

disabling that feature."""

78

disabling that feature."""

79

80

for f in fctx.ancestors():

80

for f in fctx.ancestors():

81

path = f.path()

81

path = f.path()

82

if am.get(path, None) == f.filenode():

82

if am.get(path, None) == f.filenode():

83

return path

83

return path

84

if basemf and basemf.get(path, None) == f.filenode():

84

if basemf and basemf.get(path, None) == f.filenode():

85

return path

85

return path

86

87

88

def _dirstatecopies(repo, match=None):

88

def _dirstatecopies(repo, match=None):

89

ds = repo.dirstate

89

ds = repo.dirstate

90

c = ds.copies().copy()

90

c = ds.copies().copy()

91

for k in list(c):

91

for k in list(c):

92

if ds[k] not in b'anm' or (match and not match(k)):

92

if ds[k] not in b'anm' or (match and not match(k)):

93

del c[k]

93

del c[k]

94

return c

94

return c

95

96

97

def _computeforwardmissing(a, b, match=None):

97

def _computeforwardmissing(a, b, match=None):

98

"""Computes which files are in b but not a.

98

"""Computes which files are in b but not a.

99

This is its own function so extensions can easily wrap this call to see what

99

This is its own function so extensions can easily wrap this call to see what

100

files _forwardcopies is about to process.

100

files _forwardcopies is about to process.

101

"""

101

"""

102

ma = a.manifest()

102

ma = a.manifest()

103

mb = b.manifest()

103

mb = b.manifest()

104

return mb.filesnotin(ma, match=match)

104

return mb.filesnotin(ma, match=match)

105

106

107

def usechangesetcentricalgo(repo):

107

def usechangesetcentricalgo(repo):

108

"""Checks if we should use changeset-centric copy algorithms"""

108

"""Checks if we should use changeset-centric copy algorithms"""

109

if repo.filecopiesmode == b'changeset-sidedata':

109

if repo.filecopiesmode == b'changeset-sidedata':

110

return True

110

return True

111

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

111

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

112

changesetsource = (b'changeset-only', b'compatibility')

112

changesetsource = (b'changeset-only', b'compatibility')

113

return readfrom in changesetsource

113

return readfrom in changesetsource

114

115

116

def _committedforwardcopies(a, b, base, match):

116

def _committedforwardcopies(a, b, base, match):

117

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

117

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

118

# files might have to be traced back to the fctx parent of the last

118

# files might have to be traced back to the fctx parent of the last

119

# one-side-only changeset, but not further back than that

119

# one-side-only changeset, but not further back than that

120

repo = a._repo

120

repo = a._repo

121

122

if usechangesetcentricalgo(repo):

122

if usechangesetcentricalgo(repo):

123

return _changesetforwardcopies(a, b, match)

123

return _changesetforwardcopies(a, b, match)

124

125

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

125

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

126

dbg = repo.ui.debug

126

dbg = repo.ui.debug

127

if debug:

127

if debug:

128

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

128

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

129

am = a.manifest()

129

am = a.manifest()

130

basemf = None if base is None else base.manifest()

130

basemf = None if base is None else base.manifest()

131

132

# find where new files came from

132

# find where new files came from

133

# we currently don't try to find where old files went, too expensive

133

# we currently don't try to find where old files went, too expensive

134

# this means we can miss a case like 'hg rm b; hg cp a b'

134

# this means we can miss a case like 'hg rm b; hg cp a b'

135

cm = {}

135

cm = {}

136

137

# Computing the forward missing is quite expensive on large manifests, since

137

# Computing the forward missing is quite expensive on large manifests, since

138

# it compares the entire manifests. We can optimize it in the common use

138

# it compares the entire manifests. We can optimize it in the common use

139

# case of computing what copies are in a commit versus its parent (like

139

# case of computing what copies are in a commit versus its parent (like

140

# during a rebase or histedit). Note, we exclude merge commits from this

140

# during a rebase or histedit). Note, we exclude merge commits from this

141

# optimization, since the ctx.files() for a merge commit is not correct for

141

# optimization, since the ctx.files() for a merge commit is not correct for

142

# this comparison.

142

# this comparison.

143

forwardmissingmatch = match

143

forwardmissingmatch = match

144

if b.p1() == a and b.p2().node() == node.nullid:

144

if b.p1() == a and b.p2().node() == node.nullid:

145

filesmatcher = matchmod.exact(b.files())

145

filesmatcher = matchmod.exact(b.files())

146

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

146

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

147

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

147

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

148

149

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

149

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

150

151

if debug:

151

if debug:

152

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

152

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

153

154

for f in sorted(missing):

154

for f in sorted(missing):

155

if debug:

155

if debug:

156

dbg(b'debug.copies: tracing file: %s\n' % f)

156

dbg(b'debug.copies: tracing file: %s\n' % f)

157

fctx = b[f]

157

fctx = b[f]

158

fctx._ancestrycontext = ancestrycontext

158

fctx._ancestrycontext = ancestrycontext

159

160

if debug:

160

if debug:

161

start = util.timer()

161

start = util.timer()

162

opath = _tracefile(fctx, am, basemf)

162

opath = _tracefile(fctx, am, basemf)

163

if opath:

163

if opath:

164

if debug:

164

if debug:

165

dbg(b'debug.copies: rename of: %s\n' % opath)

165

dbg(b'debug.copies: rename of: %s\n' % opath)

166

cm[f] = opath

166

cm[f] = opath

167

if debug:

167

if debug:

168

dbg(

168

dbg(

169

b'debug.copies: time: %f seconds\n'

169

b'debug.copies: time: %f seconds\n'

170

% (util.timer() - start)

170

% (util.timer() - start)

171

)

171

)

172

return cm

172

return cm

173

174

175

def _revinfo_getter(repo):

175

def _revinfo_getter(repo):

176

"""return a function that return multiple data given a <rev>"i

176

"""return a function that return multiple data given a <rev>"i

177

178

* p1: revision number of first parent

178

* p1: revision number of first parent

179

* p2: revision number of first parent

179

* p2: revision number of first parent

180

* p1copies: mapping of copies from p1

180

* p1copies: mapping of copies from p1

181

* p2copies: mapping of copies from p2

181

* p2copies: mapping of copies from p2

182

* removed: a list of removed files

182

* removed: a list of removed files

183

* ismerged: a callback to know if file was merged in that revision

183

* ismerged: a callback to know if file was merged in that revision

184

"""

184

"""

185

cl = repo.changelog

185

cl = repo.changelog

186

parents = cl.parentrevs

186

parents = cl.parentrevs

187

188

def get_ismerged(rev):

188

def get_ismerged(rev):

189

ctx = repo[rev]

189

ctx = repo[rev]

190

191

def ismerged(path):

191

def ismerged(path):

192

if path not in ctx.files():

192

if path not in ctx.files():

193

return False

193

return False

194

fctx = ctx[path]

194

fctx = ctx[path]

195

parents = fctx._filelog.parents(fctx._filenode)

195

parents = fctx._filelog.parents(fctx._filenode)

196

nb_parents = 0

196

nb_parents = 0

197

for n in parents:

197

for n in parents:

198

if n != node.nullid:

198

if n != node.nullid:

199

nb_parents += 1

199

nb_parents += 1

200

return nb_parents >= 2

200

return nb_parents >= 2

201

202

return ismerged

202

return ismerged

203

204

if repo.filecopiesmode == b'changeset-sidedata':

204

if repo.filecopiesmode == b'changeset-sidedata':

205

changelogrevision = cl.changelogrevision

205

changelogrevision = cl.changelogrevision

206

flags = cl.flags

206

flags = cl.flags

207

208

# A small cache to avoid doing the work twice for merges

208

# A small cache to avoid doing the work twice for merges

209

#

209

#

210

# In the vast majority of cases, if we ask information for a revision

210

# In the vast majority of cases, if we ask information for a revision

211

# about 1 parent, we'll later ask it for the other. So it make sense to

211

# about 1 parent, we'll later ask it for the other. So it make sense to

212

# keep the information around when reaching the first parent of a merge

212

# keep the information around when reaching the first parent of a merge

213

# and dropping it after it was provided for the second parents.

213

# and dropping it after it was provided for the second parents.

214

#

214

#

215

# It exists cases were only one parent of the merge will be walked. It

215

# It exists cases were only one parent of the merge will be walked. It

216

# happens when the "destination" the copy tracing is descendant from a

216

# happens when the "destination" the copy tracing is descendant from a

217

# new root, not common with the "source". In that case, we will only walk

217

# new root, not common with the "source". In that case, we will only walk

218

# through merge parents that are descendant of changesets common

218

# through merge parents that are descendant of changesets common

219

# between "source" and "destination".

219

# between "source" and "destination".

220

#

220

#

221

# With the current case implementation if such changesets have a copy

221

# With the current case implementation if such changesets have a copy

222

# information, we'll keep them in memory until the end of

222

# information, we'll keep them in memory until the end of

223

# _changesetforwardcopies. We don't expect the case to be frequent

223

# _changesetforwardcopies. We don't expect the case to be frequent

224

# enough to matters.

224

# enough to matters.

225

#

225

#

226

# In addition, it would be possible to reach pathological case, were

226

# In addition, it would be possible to reach pathological case, were

227

# many first parent are met before any second parent is reached. In

227

# many first parent are met before any second parent is reached. In

228

# that case the cache could grow. If this even become an issue one can

228

# that case the cache could grow. If this even become an issue one can

229

# safely introduce a maximum cache size. This would trade extra CPU/IO

229

# safely introduce a maximum cache size. This would trade extra CPU/IO

230

# time to save memory.

230

# time to save memory.

231

merge_caches = {}

231

merge_caches = {}

232

233

def revinfo(rev):

233

def revinfo(rev):

234

p1, p2 = parents(rev)

234

p1, p2 = parents(rev)

235

value = None

235

value = None

236

if flags(rev) & REVIDX_SIDEDATA:

236

if flags(rev) & REVIDX_SIDEDATA:

237

e = merge_caches.pop(rev, None)

237

e = merge_caches.pop(rev, None)

238

if e is not None:

238

if e is not None:

239

return e

239

return e

240

c = changelogrevision(rev)

240

c = changelogrevision(rev)

241

p1copies = c.p1copies

241

p1copies = c.p1copies

242

p2copies = c.p2copies

242

p2copies = c.p2copies

243

removed = c.filesremoved

243

removed = c.filesremoved

244

if p1 != node.nullrev and p2 != node.nullrev:

244

if p1 != node.nullrev and p2 != node.nullrev:

245

# XXX some case we over cache, IGNORE

245

# XXX some case we over cache, IGNORE

246

value = merge_caches[rev] = (

246

value = merge_caches[rev] = (

247

p1,

247

p1,

248

p2,

248

p2,

249

p1copies,

249

p1copies,

250

p2copies,

250

p2copies,

251

removed,

251

removed,

252

get_ismerged(rev),

252

get_ismerged(rev),

253

)

253

)

254

else:

254

else:

255

p1copies = {}

255

p1copies = {}

256

p2copies = {}

256

p2copies = {}

257

removed = []

257

removed = []

258

259

if value is None:

259

if value is None:

260

value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))

260

value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))

261

return value

261

return value

262

263

else:

263

else:

264

265

def revinfo(rev):

265

def revinfo(rev):

266

p1, p2 = parents(rev)

266

p1, p2 = parents(rev)

267

ctx = repo[rev]

267

ctx = repo[rev]

268

p1copies, p2copies = ctx._copies

268

p1copies, p2copies = ctx._copies

269

removed = ctx.filesremoved()

269

removed = ctx.filesremoved()

270

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

270

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

271

272

return revinfo

272

return revinfo

273

274

275

def _changesetforwardcopies(a, b, match):

275

def _changesetforwardcopies(a, b, match):

276

if a.rev() in (node.nullrev, b.rev()):

276

if a.rev() in (node.nullrev, b.rev()):

277

return {}

277

return {}

278

279

repo = a.repo().unfiltered()

279

repo = a.repo().unfiltered()

280

children = {}

280

children = {}

281

revinfo = _revinfo_getter(repo)

281

revinfo = _revinfo_getter(repo)

282

283

cl = repo.changelog

283

cl = repo.changelog

284

isancestor = cl.isancestorrev # XXX we should had chaching to this.

284

isancestor = cl.isancestorrev # XXX we should had chaching to this.

285

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

285

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

286

mrset = set(missingrevs)

286

mrset = set(missingrevs)

287

roots = set()

287

roots = set()

288

for r in missingrevs:

288

for r in missingrevs:

289

for p in cl.parentrevs(r):

289

for p in cl.parentrevs(r):

290

if p == node.nullrev:

290

if p == node.nullrev:

291

continue

291

continue

292

if p not in children:

292

if p not in children:

293

children[p] = [r]

293

children[p] = [r]

294

else:

294

else:

295

children[p].append(r)

295

children[p].append(r)

296

if p not in mrset:

296

if p not in mrset:

297

roots.add(p)

297

roots.add(p)

298

if not roots:

298

if not roots:

299

# no common revision to track copies from

299

# no common revision to track copies from

300

return {}

300

return {}

301

min_root = min(roots)

301

min_root = min(roots)

302

303

from_head = set(

303

from_head = set(

304

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

304

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

305

)

305

)

306

307

iterrevs = set(from_head)

307

iterrevs = set(from_head)

308

iterrevs &= mrset

308

iterrevs &= mrset

309

iterrevs.update(roots)

309

iterrevs.update(roots)

310

iterrevs.remove(b.rev())

310

iterrevs.remove(b.rev())

311

revs = sorted(iterrevs)

311

revs = sorted(iterrevs)

312

return _combine_changeset_copies(

312

313

revs, children, b.rev(), revinfo, match, isancestor

313

if repo.filecopiesmode == b'changeset-sidedata':

314

)

314

return _combine_changeset_copies(

315

revs, children, b.rev(), revinfo, match, isancestor

316

)

317

else:

318

return _combine_changeset_copies_extra(

319

revs, children, b.rev(), revinfo, match, isancestor

320

)

315

321

316

322

317

def _combine_changeset_copies(

323

def _combine_changeset_copies(

318

revs, children, targetrev, revinfo, match, isancestor

324

revs, children, targetrev, revinfo, match, isancestor

319

):

325

):

320

"""combine the copies information for each item of iterrevs

326

"""combine the copies information for each item of iterrevs

321

327

322

revs: sorted iterable of revision to visit

328

revs: sorted iterable of revision to visit

323

children: a {parent: [children]} mapping.

329

children: a {parent: [children]} mapping.

324

targetrev: the final copies destination revision (not in iterrevs)

330

targetrev: the final copies destination revision (not in iterrevs)

325

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

331

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

326

match: a matcher

332

match: a matcher

327

333

328

It returns the aggregated copies information for `targetrev`.

334

It returns the aggregated copies information for `targetrev`.

329

"""

335

"""

330

all_copies = {}

336

all_copies = {}

331

alwaysmatch = match.always()

337

alwaysmatch = match.always()

332

for r in revs:

338

for r in revs:

333

copies = all_copies.pop(r, None)

339

copies = all_copies.pop(r, None)

334

if copies is None:

340

if copies is None:

335

# this is a root

341

# this is a root

336

copies = {}

342

copies = {}

337

for i, c in enumerate(children[r]):

343

for i, c in enumerate(children[r]):

338

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

344

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

339

if r == p1:

345

if r == p1:

340

parent = 1

346

parent = 1

341

childcopies = p1copies

347

childcopies = p1copies

342

else:

348

else:

343

assert r == p2

349

assert r == p2

344

parent = 2

350

parent = 2

345

childcopies = p2copies

351

childcopies = p2copies

346

if not alwaysmatch:

352

if not alwaysmatch:

347

childcopies = {

353

childcopies = {

348

dst: src for dst, src in childcopies.items() if match(dst)

354

dst: src for dst, src in childcopies.items() if match(dst)

349

}

355

}

350

newcopies = copies

356

newcopies = copies

351

if childcopies:

357

if childcopies:

352

newcopies = copies.copy()

358

newcopies = copies.copy()

353

for dest, source in pycompat.iteritems(childcopies):

359

for dest, source in pycompat.iteritems(childcopies):

354

prev = copies.get(source)

360

prev = copies.get(source)

355

if prev is not None and prev[1] is not None:

361

if prev is not None and prev[1] is not None:

356

source = prev[1]

362

source = prev[1]

357

newcopies[dest] = (c, source)

363

newcopies[dest] = (c, source)

358

assert newcopies is not copies

364

assert newcopies is not copies

359

for f in removed:

365

for f in removed:

360

if f in newcopies:

366

if f in newcopies:

361

if newcopies is copies:

367

if newcopies is copies:

362

# copy on write to avoid affecting potential other

368

# copy on write to avoid affecting potential other

363

# branches. when there are no other branches, this

369

# branches. when there are no other branches, this

364

# could be avoided.

370

# could be avoided.

365

newcopies = copies.copy()

371

newcopies = copies.copy()

366

newcopies[f] = (c, None)

372

newcopies[f] = (c, None)

367

othercopies = all_copies.get(c)

373

othercopies = all_copies.get(c)

368

if othercopies is None:

374

if othercopies is None:

369

all_copies[c] = newcopies

375

all_copies[c] = newcopies

370

else:

376

else:

371

# we are the second parent to work on c, we need to merge our

377

# we are the second parent to work on c, we need to merge our

372

# work with the other.

378

# work with the other.

373

#

379

#

374

# In case of conflict, parent 1 take precedence over parent 2.

380

# In case of conflict, parent 1 take precedence over parent 2.

375

# This is an arbitrary choice made anew when implementing

381

# This is an arbitrary choice made anew when implementing

376

# changeset based copies. It was made without regards with

382

# changeset based copies. It was made without regards with

377

# potential filelog related behavior.

383

# potential filelog related behavior.

378

if parent == 1:

384

if parent == 1:

379

_merge_copies_dict(

385

_merge_copies_dict(

380

othercopies, newcopies, isancestor, ismerged

386

othercopies, newcopies, isancestor, ismerged

381

)

387

)

382

else:

388

else:

383

_merge_copies_dict(

389

_merge_copies_dict(

384

newcopies, othercopies, isancestor, ismerged

390

newcopies, othercopies, isancestor, ismerged

385

)

391

)

386

all_copies[c] = newcopies

392

all_copies[c] = newcopies

387

393

388

final_copies = {}

394

final_copies = {}

389

for dest, (tt, source) in all_copies[targetrev].items():

395

for dest, (tt, source) in all_copies[targetrev].items():

390

if source is not None:

396

if source is not None:

391

final_copies[dest] = source

397

final_copies[dest] = source

392

return final_copies

398

return final_copies

393

399

394

400

395

def _merge_copies_dict(minor, major, isancestor, ismerged):

401

def _merge_copies_dict(minor, major, isancestor, ismerged):

396

"""merge two copies-mapping together, minor and major

402

"""merge two copies-mapping together, minor and major

397

403

398

In case of conflict, value from "major" will be picked.

404

In case of conflict, value from "major" will be picked.

399

405

400

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

406

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

401

ancestors of `high_rev`,

407

ancestors of `high_rev`,

402

408

403

- `ismerged(path)`: callable return True if `path` have been merged in the

409

- `ismerged(path)`: callable return True if `path` have been merged in the

404

current revision,

410

current revision,

405

"""

411

"""

406

for dest, value in major.items():

412

for dest, value in major.items():

407

other = minor.get(dest)

413

other = minor.get(dest)

408

if other is None:

414

if other is None:

409

minor[dest] = value

415

minor[dest] = value

410

else:

416

else:

411

new_tt = value[0]

417

new_tt = value[0]

412

other_tt = other[0]

418

other_tt = other[0]

413

if value[1] == other[1]:

419

if value[1] == other[1]:

414

continue

420

continue

415

# content from "major" wins, unless it is older

421

# content from "major" wins, unless it is older

416

# than the branch point or there is a merge

422

# than the branch point or there is a merge

417

if (

423

if (

418

new_tt == other_tt

424

new_tt == other_tt

419

or not isancestor(new_tt, other_tt)

425

or not isancestor(new_tt, other_tt)

420

or ismerged(dest)

426

or ismerged(dest)

421

):

427

):

422

minor[dest] = value

428

minor[dest] = value

423

429

424

430

431

def _combine_changeset_copies_extra(

432

revs, children, targetrev, revinfo, match, isancestor

433

):

434

"""version of `_combine_changeset_copies` that works with the Google

435

specific "extra" based storage for copy information"""

436

all_copies = {}

437

alwaysmatch = match.always()

438

for r in revs:

439

copies = all_copies.pop(r, None)

440

if copies is None:

441

# this is a root

442

copies = {}

443

for i, c in enumerate(children[r]):

444

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

445

if r == p1:

446

parent = 1

447

childcopies = p1copies

448

else:

449

assert r == p2

450

parent = 2

451

childcopies = p2copies

452

if not alwaysmatch:

453

childcopies = {

454

dst: src for dst, src in childcopies.items() if match(dst)

455

}

456

newcopies = copies

457

if childcopies:

458

newcopies = copies.copy()

459

for dest, source in pycompat.iteritems(childcopies):

460

prev = copies.get(source)

461

if prev is not None and prev[1] is not None:

462

source = prev[1]

463

newcopies[dest] = (c, source)

464

assert newcopies is not copies

465

for f in removed:

466

if f in newcopies:

467

if newcopies is copies:

468

# copy on write to avoid affecting potential other

469

# branches. when there are no other branches, this

470

# could be avoided.

471

newcopies = copies.copy()

472

newcopies[f] = (c, None)

473

othercopies = all_copies.get(c)

474

if othercopies is None:

475

all_copies[c] = newcopies

476

else:

477

# we are the second parent to work on c, we need to merge our

478

# work with the other.

479

#

480

# In case of conflict, parent 1 take precedence over parent 2.

481

# This is an arbitrary choice made anew when implementing

482

# changeset based copies. It was made without regards with

483

# potential filelog related behavior.

484

if parent == 1:

485

_merge_copies_dict_extra(

486

othercopies, newcopies, isancestor, ismerged

487

)

488

else:

489

_merge_copies_dict_extra(

490

newcopies, othercopies, isancestor, ismerged

491

)

492

all_copies[c] = newcopies

493

494

final_copies = {}

495

for dest, (tt, source) in all_copies[targetrev].items():

496

if source is not None:

497

final_copies[dest] = source

498

return final_copies

499

500

501

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

502

"""version of `_merge_copies_dict` that works with the Google

503

specific "extra" based storage for copy information"""

504

for dest, value in major.items():

505

other = minor.get(dest)

506

if other is None:

507

minor[dest] = value

508

else:

509

new_tt = value[0]

510

other_tt = other[0]

511

if value[1] == other[1]:

512

continue

513

# content from "major" wins, unless it is older

514

# than the branch point or there is a merge

515

if (

516

new_tt == other_tt

517

or not isancestor(new_tt, other_tt)

518

or ismerged(dest)

519

):

520

minor[dest] = value

521

522

425

def _forwardcopies(a, b, base=None, match=None):

523

def _forwardcopies(a, b, base=None, match=None):

426

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

524

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

427

525

428

if base is None:

526

if base is None:

429

base = a

527

base = a

430

match = a.repo().narrowmatch(match)

528

match = a.repo().narrowmatch(match)

431

# check for working copy

529

# check for working copy

432

if b.rev() is None:

530

if b.rev() is None:

433

cm = _committedforwardcopies(a, b.p1(), base, match)

531

cm = _committedforwardcopies(a, b.p1(), base, match)

434

# combine copies from dirstate if necessary

532

# combine copies from dirstate if necessary

435

copies = _chain(cm, _dirstatecopies(b._repo, match))

533

copies = _chain(cm, _dirstatecopies(b._repo, match))

436

else:

534

else:

437

copies = _committedforwardcopies(a, b, base, match)

535

copies = _committedforwardcopies(a, b, base, match)

438

return copies

536

return copies

439

537

440

538

441

def _backwardrenames(a, b, match):

539

def _backwardrenames(a, b, match):

442

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

540

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

443

return {}

541

return {}

444

542

445

# Even though we're not taking copies into account, 1:n rename situations

543

# Even though we're not taking copies into account, 1:n rename situations

446

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

544

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

447

# arbitrarily pick one of the renames.

545

# arbitrarily pick one of the renames.

448

# We don't want to pass in "match" here, since that would filter

546

# We don't want to pass in "match" here, since that would filter

449

# the destination by it. Since we're reversing the copies, we want

547

# the destination by it. Since we're reversing the copies, we want

450

# to filter the source instead.

548

# to filter the source instead.

451

f = _forwardcopies(b, a)

549

f = _forwardcopies(b, a)

452

r = {}

550

r = {}

453

for k, v in sorted(pycompat.iteritems(f)):

551

for k, v in sorted(pycompat.iteritems(f)):

454

if match and not match(v):

552

if match and not match(v):

455

continue

553

continue

456

# remove copies

554

# remove copies

457

if v in a:

555

if v in a:

458

continue

556

continue

459

r[v] = k

557

r[v] = k

460

return r

558

return r

461

559

462

560

463

def pathcopies(x, y, match=None):

561

def pathcopies(x, y, match=None):

464

"""find {dst@y: src@x} copy mapping for directed compare"""

562

"""find {dst@y: src@x} copy mapping for directed compare"""

465

repo = x._repo

563

repo = x._repo

466

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

564

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

467

if debug:

565

if debug:

468

repo.ui.debug(

566

repo.ui.debug(

469

b'debug.copies: searching copies from %s to %s\n' % (x, y)

567

b'debug.copies: searching copies from %s to %s\n' % (x, y)

470

)

568

)

471

if x == y or not x or not y:

569

if x == y or not x or not y:

472

return {}

570

return {}

473

if y.rev() is None and x == y.p1():

571

if y.rev() is None and x == y.p1():

474

if debug:

572

if debug:

475

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

573

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

476

# short-circuit to avoid issues with merge states

574

# short-circuit to avoid issues with merge states

477

return _dirstatecopies(repo, match)

575

return _dirstatecopies(repo, match)

478

a = y.ancestor(x)

576

a = y.ancestor(x)

479

if a == x:

577

if a == x:

480

if debug:

578

if debug:

481

repo.ui.debug(b'debug.copies: search mode: forward\n')

579

repo.ui.debug(b'debug.copies: search mode: forward\n')

482

copies = _forwardcopies(x, y, match=match)

580

copies = _forwardcopies(x, y, match=match)

483

elif a == y:

581

elif a == y:

484

if debug:

582

if debug:

485

repo.ui.debug(b'debug.copies: search mode: backward\n')

583

repo.ui.debug(b'debug.copies: search mode: backward\n')

486

copies = _backwardrenames(x, y, match=match)

584

copies = _backwardrenames(x, y, match=match)

487

else:

585

else:

488

if debug:

586

if debug:

489

repo.ui.debug(b'debug.copies: search mode: combined\n')

587

repo.ui.debug(b'debug.copies: search mode: combined\n')

490

base = None

588

base = None

491

if a.rev() != node.nullrev:

589

if a.rev() != node.nullrev:

492

base = x

590

base = x

493

copies = _chain(

591

copies = _chain(

494

_backwardrenames(x, a, match=match),

592

_backwardrenames(x, a, match=match),

495

_forwardcopies(a, y, base, match=match),

593

_forwardcopies(a, y, base, match=match),

496

)

594

)

497

_filter(x, y, copies)

595

_filter(x, y, copies)

498

return copies

596

return copies

499

597

500

598

501

def mergecopies(repo, c1, c2, base):

599

def mergecopies(repo, c1, c2, base):

502

"""

600

"""

503

Finds moves and copies between context c1 and c2 that are relevant for

601

Finds moves and copies between context c1 and c2 that are relevant for

504

merging. 'base' will be used as the merge base.

602

merging. 'base' will be used as the merge base.

505

603

506

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

604

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

507

files that were moved/ copied in one merge parent and modified in another.

605

files that were moved/ copied in one merge parent and modified in another.

508

For example:

606

For example:

509

607

510

o ---> 4 another commit

608

o ---> 4 another commit

511

|

609

|

512

| o ---> 3 commit that modifies a.txt

610

| o ---> 3 commit that modifies a.txt

513

| /

611

| /

514

o / ---> 2 commit that moves a.txt to b.txt

612

o / ---> 2 commit that moves a.txt to b.txt

515

|/

613

|/

516

o ---> 1 merge base

614

o ---> 1 merge base

517

615

518

If we try to rebase revision 3 on revision 4, since there is no a.txt in

616

If we try to rebase revision 3 on revision 4, since there is no a.txt in

519

revision 4, and if user have copytrace disabled, we prints the following

617

revision 4, and if user have copytrace disabled, we prints the following

520

message:

618

message:

521

619

522

```other changed <file> which local deleted```

620

```other changed <file> which local deleted```

523

621

524

Returns a tuple where:

622

Returns a tuple where:

525

623

526

"branch_copies" an instance of branch_copies.

624

"branch_copies" an instance of branch_copies.

527

625

528

"diverge" is a mapping of source name -> list of destination names

626

"diverge" is a mapping of source name -> list of destination names

529

for divergent renames.

627

for divergent renames.

530

628

531

This function calls different copytracing algorithms based on config.

629

This function calls different copytracing algorithms based on config.

532

"""

630

"""

533

# avoid silly behavior for update from empty dir

631

# avoid silly behavior for update from empty dir

534

if not c1 or not c2 or c1 == c2:

632

if not c1 or not c2 or c1 == c2:

535

return branch_copies(), branch_copies(), {}

633

return branch_copies(), branch_copies(), {}

536

634

537

narrowmatch = c1.repo().narrowmatch()

635

narrowmatch = c1.repo().narrowmatch()

538

636

539

# avoid silly behavior for parent -> working dir

637

# avoid silly behavior for parent -> working dir

540

if c2.node() is None and c1.node() == repo.dirstate.p1():

638

if c2.node() is None and c1.node() == repo.dirstate.p1():

541

return (

639

return (

542

branch_copies(_dirstatecopies(repo, narrowmatch)),

640

branch_copies(_dirstatecopies(repo, narrowmatch)),

543

branch_copies(),

641

branch_copies(),

544

{},

642

{},

545

)

643

)

546

644

547

copytracing = repo.ui.config(b'experimental', b'copytrace')

645

copytracing = repo.ui.config(b'experimental', b'copytrace')

548

if stringutil.parsebool(copytracing) is False:

646

if stringutil.parsebool(copytracing) is False:

549

# stringutil.parsebool() returns None when it is unable to parse the

647

# stringutil.parsebool() returns None when it is unable to parse the

550

# value, so we should rely on making sure copytracing is on such cases

648

# value, so we should rely on making sure copytracing is on such cases

551

return branch_copies(), branch_copies(), {}

649

return branch_copies(), branch_copies(), {}

552

650

553

if usechangesetcentricalgo(repo):

651

if usechangesetcentricalgo(repo):

554

# The heuristics don't make sense when we need changeset-centric algos

652

# The heuristics don't make sense when we need changeset-centric algos

555

return _fullcopytracing(repo, c1, c2, base)

653

return _fullcopytracing(repo, c1, c2, base)

556

654

557

# Copy trace disabling is explicitly below the node == p1 logic above

655

# Copy trace disabling is explicitly below the node == p1 logic above

558

# because the logic above is required for a simple copy to be kept across a

656

# because the logic above is required for a simple copy to be kept across a

559

# rebase.

657

# rebase.

560

if copytracing == b'heuristics':

658

if copytracing == b'heuristics':

561

# Do full copytracing if only non-public revisions are involved as

659

# Do full copytracing if only non-public revisions are involved as

562

# that will be fast enough and will also cover the copies which could

660

# that will be fast enough and will also cover the copies which could

563

# be missed by heuristics

661

# be missed by heuristics

564

if _isfullcopytraceable(repo, c1, base):

662

if _isfullcopytraceable(repo, c1, base):

565

return _fullcopytracing(repo, c1, c2, base)

663

return _fullcopytracing(repo, c1, c2, base)

566

return _heuristicscopytracing(repo, c1, c2, base)

664

return _heuristicscopytracing(repo, c1, c2, base)

567

else:

665

else:

568

return _fullcopytracing(repo, c1, c2, base)

666

return _fullcopytracing(repo, c1, c2, base)

569

667

570

668

571

def _isfullcopytraceable(repo, c1, base):

669

def _isfullcopytraceable(repo, c1, base):

572

""" Checks that if base, source and destination are all no-public branches,

670

""" Checks that if base, source and destination are all no-public branches,

573

if yes let's use the full copytrace algorithm for increased capabilities

671

if yes let's use the full copytrace algorithm for increased capabilities

574

since it will be fast enough.

672

since it will be fast enough.

575

673

576

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

674

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

577

number of changesets from c1 to base such that if number of changesets are

675

number of changesets from c1 to base such that if number of changesets are

578

more than the limit, full copytracing algorithm won't be used.

676

more than the limit, full copytracing algorithm won't be used.

579

"""

677

"""

580

if c1.rev() is None:

678

if c1.rev() is None:

581

c1 = c1.p1()

679

c1 = c1.p1()

582

if c1.mutable() and base.mutable():

680

if c1.mutable() and base.mutable():

583

sourcecommitlimit = repo.ui.configint(

681

sourcecommitlimit = repo.ui.configint(

584

b'experimental', b'copytrace.sourcecommitlimit'

682

b'experimental', b'copytrace.sourcecommitlimit'

585

)

683

)

586

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

684

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

587

return commits < sourcecommitlimit

685

return commits < sourcecommitlimit

588

return False

686

return False

589

687

590

688

591

def _checksinglesidecopies(

689

def _checksinglesidecopies(

592

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

690

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

593

):

691

):

594

if src not in m2:

692

if src not in m2:

595

# deleted on side 2

693

# deleted on side 2

596

if src not in m1:

694

if src not in m1:

597

# renamed on side 1, deleted on side 2

695

# renamed on side 1, deleted on side 2

598

renamedelete[src] = dsts1

696

renamedelete[src] = dsts1

599

elif src not in mb:

697

elif src not in mb:

600

# Work around the "short-circuit to avoid issues with merge states"

698

# Work around the "short-circuit to avoid issues with merge states"

601

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

699

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

602

# destination doesn't exist in y.

700

# destination doesn't exist in y.

603

pass

701

pass

604

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

702

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

605

return

703

return

606

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

704

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

607

# modified on side 2

705

# modified on side 2

608

for dst in dsts1:

706

for dst in dsts1:

609

copy[dst] = src

707

copy[dst] = src

610

708

611

709

612

class branch_copies(object):

710

class branch_copies(object):

613

"""Information about copies made on one side of a merge/graft.

711

"""Information about copies made on one side of a merge/graft.

614

712

615

"copy" is a mapping from destination name -> source name,

713

"copy" is a mapping from destination name -> source name,

616

where source is in c1 and destination is in c2 or vice-versa.

714

where source is in c1 and destination is in c2 or vice-versa.

617

715

618

"movewithdir" is a mapping from source name -> destination name,

716

"movewithdir" is a mapping from source name -> destination name,

619

where the file at source present in one context but not the other

717

where the file at source present in one context but not the other

620

needs to be moved to destination by the merge process, because the

718

needs to be moved to destination by the merge process, because the

621

other context moved the directory it is in.

719

other context moved the directory it is in.

622

720

623

"renamedelete" is a mapping of source name -> list of destination

721

"renamedelete" is a mapping of source name -> list of destination

624

names for files deleted in c1 that were renamed in c2 or vice-versa.

722

names for files deleted in c1 that were renamed in c2 or vice-versa.

625

723

626

"dirmove" is a mapping of detected source dir -> destination dir renames.

724

"dirmove" is a mapping of detected source dir -> destination dir renames.

627

This is needed for handling changes to new files previously grafted into

725

This is needed for handling changes to new files previously grafted into

628

renamed directories.

726

renamed directories.

629

"""

727

"""

630

728

631

def __init__(

729

def __init__(

632

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

730

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

633

):

731

):

634

self.copy = {} if copy is None else copy

732

self.copy = {} if copy is None else copy

635

self.renamedelete = {} if renamedelete is None else renamedelete

733

self.renamedelete = {} if renamedelete is None else renamedelete

636

self.dirmove = {} if dirmove is None else dirmove

734

self.dirmove = {} if dirmove is None else dirmove

637

self.movewithdir = {} if movewithdir is None else movewithdir

735

self.movewithdir = {} if movewithdir is None else movewithdir

638

736

639

def __repr__(self):

737

def __repr__(self):

640

return (

738

return (

641

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

739

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

642

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

740

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

643

)

741

)

644

742

645

743

646

def _fullcopytracing(repo, c1, c2, base):

744

def _fullcopytracing(repo, c1, c2, base):

647

""" The full copytracing algorithm which finds all the new files that were

745

""" The full copytracing algorithm which finds all the new files that were

648

added from merge base up to the top commit and for each file it checks if

746

added from merge base up to the top commit and for each file it checks if

649

this file was copied from another file.

747

this file was copied from another file.

650

748

651

This is pretty slow when a lot of changesets are involved but will track all

749

This is pretty slow when a lot of changesets are involved but will track all

652

the copies.

750

the copies.

653

"""

751

"""

654

m1 = c1.manifest()

752

m1 = c1.manifest()

655

m2 = c2.manifest()

753

m2 = c2.manifest()

656

mb = base.manifest()

754

mb = base.manifest()

657

755

658

copies1 = pathcopies(base, c1)

756

copies1 = pathcopies(base, c1)

659

copies2 = pathcopies(base, c2)

757

copies2 = pathcopies(base, c2)

660

758

661

if not (copies1 or copies2):

759

if not (copies1 or copies2):

662

return branch_copies(), branch_copies(), {}

760

return branch_copies(), branch_copies(), {}

663

761

664

inversecopies1 = {}

762

inversecopies1 = {}

665

inversecopies2 = {}

763

inversecopies2 = {}

666

for dst, src in copies1.items():

764

for dst, src in copies1.items():

667

inversecopies1.setdefault(src, []).append(dst)

765

inversecopies1.setdefault(src, []).append(dst)

668

for dst, src in copies2.items():

766

for dst, src in copies2.items():

669

inversecopies2.setdefault(src, []).append(dst)

767

inversecopies2.setdefault(src, []).append(dst)

670

768

671

copy1 = {}

769

copy1 = {}

672

copy2 = {}

770

copy2 = {}

673

diverge = {}

771

diverge = {}

674

renamedelete1 = {}

772

renamedelete1 = {}

675

renamedelete2 = {}

773

renamedelete2 = {}

676

allsources = set(inversecopies1) | set(inversecopies2)

774

allsources = set(inversecopies1) | set(inversecopies2)

677

for src in allsources:

775

for src in allsources:

678

dsts1 = inversecopies1.get(src)

776

dsts1 = inversecopies1.get(src)

679

dsts2 = inversecopies2.get(src)

777

dsts2 = inversecopies2.get(src)

680

if dsts1 and dsts2:

778

if dsts1 and dsts2:

681

# copied/renamed on both sides

779

# copied/renamed on both sides

682

if src not in m1 and src not in m2:

780

if src not in m1 and src not in m2:

683

# renamed on both sides

781

# renamed on both sides

684

dsts1 = set(dsts1)

782

dsts1 = set(dsts1)

685

dsts2 = set(dsts2)

783

dsts2 = set(dsts2)

686

# If there's some overlap in the rename destinations, we

784

# If there's some overlap in the rename destinations, we

687

# consider it not divergent. For example, if side 1 copies 'a'

785

# consider it not divergent. For example, if side 1 copies 'a'

688

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

786

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

689

# and 'd' and deletes 'a'.

787

# and 'd' and deletes 'a'.

690

if dsts1 & dsts2:

788

if dsts1 & dsts2:

691

for dst in dsts1 & dsts2:

789

for dst in dsts1 & dsts2:

692

copy1[dst] = src

790

copy1[dst] = src

693

copy2[dst] = src

791

copy2[dst] = src

694

else:

792

else:

695

diverge[src] = sorted(dsts1 | dsts2)

793

diverge[src] = sorted(dsts1 | dsts2)

696

elif src in m1 and src in m2:

794

elif src in m1 and src in m2:

697

# copied on both sides

795

# copied on both sides

698

dsts1 = set(dsts1)

796

dsts1 = set(dsts1)

699

dsts2 = set(dsts2)

797

dsts2 = set(dsts2)

700

for dst in dsts1 & dsts2:

798

for dst in dsts1 & dsts2:

701

copy1[dst] = src

799

copy1[dst] = src

702

copy2[dst] = src

800

copy2[dst] = src

703

# TODO: Handle cases where it was renamed on one side and copied

801

# TODO: Handle cases where it was renamed on one side and copied

704

# on the other side

802

# on the other side

705

elif dsts1:

803

elif dsts1:

706

# copied/renamed only on side 1

804

# copied/renamed only on side 1

707

_checksinglesidecopies(

805

_checksinglesidecopies(

708

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

806

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

709

)

807

)

710

elif dsts2:

808

elif dsts2:

711

# copied/renamed only on side 2

809

# copied/renamed only on side 2

712

_checksinglesidecopies(

810

_checksinglesidecopies(

713

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

811

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

714

)

812

)

715

813

716

# find interesting file sets from manifests

814

# find interesting file sets from manifests

717

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

815

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

718

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

816

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

719

u1 = sorted(addedinm1 - addedinm2)

817

u1 = sorted(addedinm1 - addedinm2)

720

u2 = sorted(addedinm2 - addedinm1)

818

u2 = sorted(addedinm2 - addedinm1)

721

819

722

header = b" unmatched files in %s"

820

header = b" unmatched files in %s"

723

if u1:

821

if u1:

724

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

822

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

725

if u2:

823

if u2:

726

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

824

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

727

825

728

if repo.ui.debugflag:

826

if repo.ui.debugflag:

729

renamedeleteset = set()

827

renamedeleteset = set()

730

divergeset = set()

828

divergeset = set()

731

for dsts in diverge.values():

829

for dsts in diverge.values():

732

divergeset.update(dsts)

830

divergeset.update(dsts)

733

for dsts in renamedelete1.values():

831

for dsts in renamedelete1.values():

734

renamedeleteset.update(dsts)

832

renamedeleteset.update(dsts)

735

for dsts in renamedelete2.values():

833

for dsts in renamedelete2.values():

736

renamedeleteset.update(dsts)

834

renamedeleteset.update(dsts)

737

835

738

repo.ui.debug(

836

repo.ui.debug(

739

b" all copies found (* = to merge, ! = divergent, "

837

b" all copies found (* = to merge, ! = divergent, "

740

b"% = renamed and deleted):\n"

838

b"% = renamed and deleted):\n"

741

)

839

)

742

for side, copies in ((b"local", copies1), (b"remote", copies2)):

840

for side, copies in ((b"local", copies1), (b"remote", copies2)):

743

if not copies:

841

if not copies:

744

continue

842

continue

745

repo.ui.debug(b" on %s side:\n" % side)

843

repo.ui.debug(b" on %s side:\n" % side)

746

for f in sorted(copies):

844

for f in sorted(copies):

747

note = b""

845

note = b""

748

if f in copy1 or f in copy2:

846

if f in copy1 or f in copy2:

749

note += b"*"

847

note += b"*"

750

if f in divergeset:

848

if f in divergeset:

751

note += b"!"

849

note += b"!"

752

if f in renamedeleteset:

850

if f in renamedeleteset:

753

note += b"%"

851

note += b"%"

754

repo.ui.debug(

852

repo.ui.debug(

755

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

853

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

756

)

854

)

757

del renamedeleteset

855

del renamedeleteset

758

del divergeset

856

del divergeset

759

857

760

repo.ui.debug(b" checking for directory renames\n")

858

repo.ui.debug(b" checking for directory renames\n")

761

859

762

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

860

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

763

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

861

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

764

862

765

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

863

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

766

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

864

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

767

865

768

return branch_copies1, branch_copies2, diverge

866

return branch_copies1, branch_copies2, diverge

769

867

770

868

771

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

869

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

772

"""Finds moved directories and files that should move with them.

870

"""Finds moved directories and files that should move with them.

773

871

774

ctx: the context for one of the sides

872

ctx: the context for one of the sides

775

copy: files copied on the same side (as ctx)

873

copy: files copied on the same side (as ctx)

776

fullcopy: files copied on the same side (as ctx), including those that

874

fullcopy: files copied on the same side (as ctx), including those that

777

merge.manifestmerge() won't care about

875

merge.manifestmerge() won't care about

778

addedfiles: added files on the other side (compared to ctx)

876

addedfiles: added files on the other side (compared to ctx)

779

"""

877

"""

780

# generate a directory move map

878

# generate a directory move map

781

d = ctx.dirs()

879

d = ctx.dirs()

782

invalid = set()

880

invalid = set()

783

dirmove = {}

881

dirmove = {}

784

882

785

# examine each file copy for a potential directory move, which is

883

# examine each file copy for a potential directory move, which is

786

# when all the files in a directory are moved to a new directory

884

# when all the files in a directory are moved to a new directory

787

for dst, src in pycompat.iteritems(fullcopy):

885

for dst, src in pycompat.iteritems(fullcopy):

788

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

886

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

789

if dsrc in invalid:

887

if dsrc in invalid:

790

# already seen to be uninteresting

888

# already seen to be uninteresting

791

continue

889

continue

792

elif dsrc in d and ddst in d:

890

elif dsrc in d and ddst in d:

793

# directory wasn't entirely moved locally

891

# directory wasn't entirely moved locally

794

invalid.add(dsrc)

892

invalid.add(dsrc)

795

elif dsrc in dirmove and dirmove[dsrc] != ddst:

893

elif dsrc in dirmove and dirmove[dsrc] != ddst:

796

# files from the same directory moved to two different places

894

# files from the same directory moved to two different places

797

invalid.add(dsrc)

895

invalid.add(dsrc)

798

else:

896

else:

799

# looks good so far

897

# looks good so far

800

dirmove[dsrc] = ddst

898

dirmove[dsrc] = ddst

801

899

802

for i in invalid:

900

for i in invalid:

803

if i in dirmove:

901

if i in dirmove:

804

del dirmove[i]

902

del dirmove[i]

805

del d, invalid

903

del d, invalid

806

904

807

if not dirmove:

905

if not dirmove:

808

return {}, {}

906

return {}, {}

809

907

810

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

908

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

811

909

812

for d in dirmove:

910

for d in dirmove:

813

repo.ui.debug(

911

repo.ui.debug(

814

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

912

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

815

)

913

)

816

914

817

movewithdir = {}

915

movewithdir = {}

818

# check unaccounted nonoverlapping files against directory moves

916

# check unaccounted nonoverlapping files against directory moves

819

for f in addedfiles:

917

for f in addedfiles:

820

if f not in fullcopy:

918

if f not in fullcopy:

821

for d in dirmove:

919

for d in dirmove:

822

if f.startswith(d):

920

if f.startswith(d):

823

# new file added in a directory that was moved, move it

921

# new file added in a directory that was moved, move it

824

df = dirmove[d] + f[len(d) :]

922

df = dirmove[d] + f[len(d) :]

825

if df not in copy:

923

if df not in copy:

826

movewithdir[f] = df

924

movewithdir[f] = df

827

repo.ui.debug(

925

repo.ui.debug(

828

b" pending file src: '%s' -> dst: '%s'\n"

926

b" pending file src: '%s' -> dst: '%s'\n"

829

% (f, df)

927

% (f, df)

830

)

928

)

831

break

929

break

832

930

833

return dirmove, movewithdir

931

return dirmove, movewithdir

834

932

835

933

836

def _heuristicscopytracing(repo, c1, c2, base):

934

def _heuristicscopytracing(repo, c1, c2, base):

837

""" Fast copytracing using filename heuristics

935

""" Fast copytracing using filename heuristics

838

936

839

Assumes that moves or renames are of following two types:

937

Assumes that moves or renames are of following two types:

840

938

841

1) Inside a directory only (same directory name but different filenames)

939

1) Inside a directory only (same directory name but different filenames)

842

2) Move from one directory to another

940

2) Move from one directory to another

843

(same filenames but different directory names)

941

(same filenames but different directory names)

844

942

845

Works only when there are no merge commits in the "source branch".

943

Works only when there are no merge commits in the "source branch".

846

Source branch is commits from base up to c2 not including base.

944

Source branch is commits from base up to c2 not including base.

847

945

848

If merge is involved it fallbacks to _fullcopytracing().

946

If merge is involved it fallbacks to _fullcopytracing().

849

947

850

Can be used by setting the following config:

948

Can be used by setting the following config:

851

949

852

[experimental]

950

[experimental]

853

copytrace = heuristics

951

copytrace = heuristics

854

952

855

In some cases the copy/move candidates found by heuristics can be very large

953

In some cases the copy/move candidates found by heuristics can be very large

856

in number and that will make the algorithm slow. The number of possible

954

in number and that will make the algorithm slow. The number of possible

857

candidates to check can be limited by using the config

955

candidates to check can be limited by using the config

858

`experimental.copytrace.movecandidateslimit` which defaults to 100.

956

`experimental.copytrace.movecandidateslimit` which defaults to 100.

859

"""

957

"""

860

958

861

if c1.rev() is None:

959

if c1.rev() is None:

862

c1 = c1.p1()

960

c1 = c1.p1()

863

if c2.rev() is None:

961

if c2.rev() is None:

864

c2 = c2.p1()

962

c2 = c2.p1()

865

963

866

changedfiles = set()

964

changedfiles = set()

867

m1 = c1.manifest()

965

m1 = c1.manifest()

868

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

966

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

869

# If base is not in c2 branch, we switch to fullcopytracing

967

# If base is not in c2 branch, we switch to fullcopytracing

870

repo.ui.debug(

968

repo.ui.debug(

871

b"switching to full copytracing as base is not "

969

b"switching to full copytracing as base is not "

872

b"an ancestor of c2\n"

970

b"an ancestor of c2\n"

873

)

971

)

874

return _fullcopytracing(repo, c1, c2, base)

972

return _fullcopytracing(repo, c1, c2, base)

875

973

876

ctx = c2

974

ctx = c2

877

while ctx != base:

975

while ctx != base:

878

if len(ctx.parents()) == 2:

976

if len(ctx.parents()) == 2:

879

# To keep things simple let's not handle merges

977

# To keep things simple let's not handle merges

880

repo.ui.debug(b"switching to full copytracing because of merges\n")

978

repo.ui.debug(b"switching to full copytracing because of merges\n")

881

return _fullcopytracing(repo, c1, c2, base)

979

return _fullcopytracing(repo, c1, c2, base)

882

changedfiles.update(ctx.files())

980

changedfiles.update(ctx.files())

883

ctx = ctx.p1()

981

ctx = ctx.p1()

884

982

885

copies2 = {}

983

copies2 = {}

886

cp = _forwardcopies(base, c2)

984

cp = _forwardcopies(base, c2)

887

for dst, src in pycompat.iteritems(cp):

985

for dst, src in pycompat.iteritems(cp):

888

if src in m1:

986

if src in m1:

889

copies2[dst] = src

987

copies2[dst] = src

890

988

891

# file is missing if it isn't present in the destination, but is present in

989

# file is missing if it isn't present in the destination, but is present in

892

# the base and present in the source.

990

# the base and present in the source.

893

# Presence in the base is important to exclude added files, presence in the

991

# Presence in the base is important to exclude added files, presence in the

894

# source is important to exclude removed files.

992

# source is important to exclude removed files.

895

filt = lambda f: f not in m1 and f in base and f in c2

993

filt = lambda f: f not in m1 and f in base and f in c2

896

missingfiles = [f for f in changedfiles if filt(f)]

994

missingfiles = [f for f in changedfiles if filt(f)]

897

995

898

copies1 = {}

996

copies1 = {}

899

if missingfiles:

997

if missingfiles:

900

basenametofilename = collections.defaultdict(list)

998

basenametofilename = collections.defaultdict(list)

901

dirnametofilename = collections.defaultdict(list)

999

dirnametofilename = collections.defaultdict(list)

902

1000

903

for f in m1.filesnotin(base.manifest()):

1001

for f in m1.filesnotin(base.manifest()):

904

basename = os.path.basename(f)

1002

basename = os.path.basename(f)

905

dirname = os.path.dirname(f)

1003

dirname = os.path.dirname(f)

906

basenametofilename[basename].append(f)

1004

basenametofilename[basename].append(f)

907

dirnametofilename[dirname].append(f)

1005

dirnametofilename[dirname].append(f)

908

1006

909

for f in missingfiles:

1007

for f in missingfiles:

910

basename = os.path.basename(f)

1008

basename = os.path.basename(f)

911

dirname = os.path.dirname(f)

1009

dirname = os.path.dirname(f)

912

samebasename = basenametofilename[basename]

1010

samebasename = basenametofilename[basename]

913

samedirname = dirnametofilename[dirname]

1011

samedirname = dirnametofilename[dirname]

914

movecandidates = samebasename + samedirname

1012

movecandidates = samebasename + samedirname

915

# f is guaranteed to be present in c2, that's why

1013

# f is guaranteed to be present in c2, that's why

916

# c2.filectx(f) won't fail

1014

# c2.filectx(f) won't fail

917

f2 = c2.filectx(f)

1015

f2 = c2.filectx(f)

918

# we can have a lot of candidates which can slow down the heuristics

1016

# we can have a lot of candidates which can slow down the heuristics

919

# config value to limit the number of candidates moves to check

1017

# config value to limit the number of candidates moves to check

920

maxcandidates = repo.ui.configint(

1018

maxcandidates = repo.ui.configint(

921

b'experimental', b'copytrace.movecandidateslimit'

1019

b'experimental', b'copytrace.movecandidateslimit'

922

)

1020

)

923

1021

924

if len(movecandidates) > maxcandidates:

1022

if len(movecandidates) > maxcandidates:

925

repo.ui.status(

1023

repo.ui.status(

926

_(

1024

_(

927

b"skipping copytracing for '%s', more "

1025

b"skipping copytracing for '%s', more "

928

b"candidates than the limit: %d\n"

1026

b"candidates than the limit: %d\n"

929

)

1027

)

930

% (f, len(movecandidates))

1028

% (f, len(movecandidates))

931

)

1029

)

932

continue

1030

continue

933

1031

934

for candidate in movecandidates:

1032

for candidate in movecandidates:

935

f1 = c1.filectx(candidate)

1033

f1 = c1.filectx(candidate)

936

if _related(f1, f2):

1034

if _related(f1, f2):

937

# if there are a few related copies then we'll merge

1035

# if there are a few related copies then we'll merge

938

# changes into all of them. This matches the behaviour

1036

# changes into all of them. This matches the behaviour

939

# of upstream copytracing

1037

# of upstream copytracing

940

copies1[candidate] = f

1038

copies1[candidate] = f

941

1039

942

return branch_copies(copies1), branch_copies(copies2), {}

1040

return branch_copies(copies1), branch_copies(copies2), {}

943

1041

944

1042

945

def _related(f1, f2):

1043

def _related(f1, f2):

946

"""return True if f1 and f2 filectx have a common ancestor

1044

"""return True if f1 and f2 filectx have a common ancestor

947

1045

948

Walk back to common ancestor to see if the two files originate

1046

Walk back to common ancestor to see if the two files originate

949

from the same file. Since workingfilectx's rev() is None it messes

1047

from the same file. Since workingfilectx's rev() is None it messes

950

up the integer comparison logic, hence the pre-step check for

1048

up the integer comparison logic, hence the pre-step check for

951

None (f1 and f2 can only be workingfilectx's initially).

1049

None (f1 and f2 can only be workingfilectx's initially).

952

"""

1050

"""

953

1051

954

if f1 == f2:

1052

if f1 == f2:

955

return True # a match

1053

return True # a match

956

1054

957

g1, g2 = f1.ancestors(), f2.ancestors()

1055

g1, g2 = f1.ancestors(), f2.ancestors()

958

try:

1056

try:

959

f1r, f2r = f1.linkrev(), f2.linkrev()

1057

f1r, f2r = f1.linkrev(), f2.linkrev()

960

1058

961

if f1r is None:

1059

if f1r is None:

962

f1 = next(g1)

1060

f1 = next(g1)

963

if f2r is None:

1061

if f2r is None:

964

f2 = next(g2)

1062

f2 = next(g2)

965

1063

966

while True:

1064

while True:

967

f1r, f2r = f1.linkrev(), f2.linkrev()

1065

f1r, f2r = f1.linkrev(), f2.linkrev()

968

if f1r > f2r:

1066

if f1r > f2r:

969

f1 = next(g1)

1067

f1 = next(g1)

970

elif f2r > f1r:

1068

elif f2r > f1r:

971

f2 = next(g2)

1069

f2 = next(g2)

972

else: # f1 and f2 point to files in the same linkrev

1070

else: # f1 and f2 point to files in the same linkrev

973

return f1 == f2 # true if they point to the same file

1071

return f1 == f2 # true if they point to the same file

974

except StopIteration:

1072

except StopIteration:

975

return False

1073

return False

976

1074

977

1075

978

def graftcopies(wctx, ctx, base):

1076

def graftcopies(wctx, ctx, base):

979

"""reproduce copies between base and ctx in the wctx

1077

"""reproduce copies between base and ctx in the wctx

980

1078

981

Unlike mergecopies(), this function will only consider copies between base

1079

Unlike mergecopies(), this function will only consider copies between base

982

and ctx; it will ignore copies between base and wctx. Also unlike

1080

and ctx; it will ignore copies between base and wctx. Also unlike

983

mergecopies(), this function will apply copies to the working copy (instead

1081

mergecopies(), this function will apply copies to the working copy (instead

984

of just returning information about the copies). That makes it cheaper

1082

of just returning information about the copies). That makes it cheaper

985

(especially in the common case of base==ctx.p1()) and useful also when

1083

(especially in the common case of base==ctx.p1()) and useful also when

986

experimental.copytrace=off.

1084

experimental.copytrace=off.

987

1085

988

merge.update() will have already marked most copies, but it will only

1086

merge.update() will have already marked most copies, but it will only

989

mark copies if it thinks the source files are related (see

1087

mark copies if it thinks the source files are related (see

990

merge._related()). It will also not mark copies if the file wasn't modified

1088

merge._related()). It will also not mark copies if the file wasn't modified

991

on the local side. This function adds the copies that were "missed"

1089

on the local side. This function adds the copies that were "missed"

992

by merge.update().

1090

by merge.update().

993

"""

1091

"""

994

new_copies = pathcopies(base, ctx)

1092

new_copies = pathcopies(base, ctx)

995

_filter(wctx.p1(), wctx, new_copies)

1093

_filter(wctx.p1(), wctx, new_copies)

996

for dst, src in pycompat.iteritems(new_copies):

1094

for dst, src in pycompat.iteritems(new_copies):

997

wctx[dst].markcopied(src)

1095

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     # A small cache to avoid doing the work twice for merges
                     #
                     # In the vast majority of cases, if we ask information for a revision
                     # about 1 parent, we'll later ask it for the other. So it make sense to
                     # keep the information around when reaching the first parent of a merge
                     # and dropping it after it was provided for the second parents.
                     #
                     # It exists cases were only one parent of the merge will be walked. It
                     # happens when the "destination" the copy tracing is descendant from a
                     # new root, not common with the "source". In that case, we will only walk
                     # through merge parents that are descendant of changesets common
                     # between "source" and "destination".
                     #
                     # With the current case implementation if such changesets have a copy
                     # information, we'll keep them in memory until the end of
                     # _changesetforwardcopies. We don't expect the case to be frequent
                     # enough to matters.
                     #
                     # In addition, it would be possible to reach pathological case, were
                     # many first parent are met before any second parent is reached. In
                     # that case the cache could grow. If this even become an issue one can
                     # safely introduce a maximum cache size. This would trade extra CPU/IO
                     # time to save memory.
                     merge_caches = {}
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         value = None
                         if flags(rev) & REVIDX_SIDEDATA:
                             e = merge_caches.pop(rev, None)
                             if e is not None:
                                 return e
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                             if p1 != node.nullrev and p2 != node.nullrev:
                                 # XXX some case we over cache, IGNORE
                                 value = merge_caches[rev] = (
                                     p1,
                                     p2,
                                     p1copies,
                                     p2copies,
                                     removed,
                                     get_ismerged(rev),
                                 )
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = []
                         if value is None:
                             value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
                         return value
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfo_getter(repo)
                 cl = repo.changelog
                 isancestor = cl.isancestorrev  # XXX we should had chaching to this.
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
-                return _combine_changeset_copies(
-                    revs, children, b.rev(), revinfo, match, isancestor
+                if repo.filecopiesmode == b'changeset-sidedata':
+                    return _combine_changeset_copies(
+                        revs, children, b.rev(), revinfo, match, isancestor
+                    )
+                else:
+                    return _combine_changeset_copies_extra(
+                        revs, children, b.rev(), revinfo, match, isancestor
+                    )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, ismerged):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
+            def _combine_changeset_copies_extra(
+                revs, children, targetrev, revinfo, match, isancestor
+            ):
+                """version of `_combine_changeset_copies` that works with the Google
+                specific "extra" based storage for copy information"""
+                all_copies = {}
+                alwaysmatch = match.always()
+                for r in revs:
+                    copies = all_copies.pop(r, None)
+                    if copies is None:
+                        # this is a root
+                        copies = {}
+                    for i, c in enumerate(children[r]):
+                        p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
+                        if r == p1:
+                            parent = 1
+                            childcopies = p1copies
+                        else:
+                            assert r == p2
+                            parent = 2
+                            childcopies = p2copies
+                        if not alwaysmatch:
+                            childcopies = {
+                                dst: src for dst, src in childcopies.items() if match(dst)
+                            }
+                        newcopies = copies
+                        if childcopies:
+                            newcopies = copies.copy()
+                            for dest, source in pycompat.iteritems(childcopies):
+                                prev = copies.get(source)
+                                if prev is not None and prev[1] is not None:
+                                    source = prev[1]
+                                newcopies[dest] = (c, source)
+                            assert newcopies is not copies
+                        for f in removed:
+                            if f in newcopies:
+                                if newcopies is copies:
+                                    # copy on write to avoid affecting potential other
+                                    # branches.  when there are no other branches, this
+                                    # could be avoided.
+                                    newcopies = copies.copy()
+                                newcopies[f] = (c, None)
+                        othercopies = all_copies.get(c)
+                        if othercopies is None:
+                            all_copies[c] = newcopies
+                        else:
+                            # we are the second parent to work on c, we need to merge our
+                            # work with the other.
+                            #
+                            # In case of conflict, parent 1 take precedence over parent 2.
+                            # This is an arbitrary choice made anew when implementing
+                            # changeset based copies. It was made without regards with
+                            # potential filelog related behavior.
+                            if parent == 1:
+                                _merge_copies_dict_extra(
+                                    othercopies, newcopies, isancestor, ismerged
+                                )
+                            else:
+                                _merge_copies_dict_extra(
+                                    newcopies, othercopies, isancestor, ismerged
+                                )
+                                all_copies[c] = newcopies
+                final_copies = {}
+                for dest, (tt, source) in all_copies[targetrev].items():
+                    if source is not None:
+                        final_copies[dest] = source
+                return final_copies
+            def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
+                """version of `_merge_copies_dict` that works with the Google
+                specific "extra" based storage for copy information"""
+                for dest, value in major.items():
+                    other = minor.get(dest)
+                    if other is None:
+                        minor[dest] = value
+                    else:
+                        new_tt = value[0]
+                        other_tt = other[0]
+                        if value[1] == other[1]:
+                            continue
+                        # content from "major" wins, unless it is older
+                        # than the branch point or there is a merge
+                        if (
+                            new_tt == other_tt
+                            or not isancestor(new_tt, other_tt)
+                            or ismerged(dest)
+                        ):
+                            minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return (
                         '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>'
                         % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)