upstream/mercurial-mirror Commit - r46725:2f357d05

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

policy,

20

policy,

21

pycompat,

21

pycompat,

22

util,

22

util,

23

)

23

)

24

25

26

from .utils import stringutil

26

from .utils import stringutil

27

28

from .revlogutils import (

28

from .revlogutils import (

29

flagutil,

29

flagutil,

30

sidedata as sidedatamod,

30

sidedata as sidedatamod,

31

)

31

)

32

33

rustmod = policy.importrust("copy_tracing")

33

rustmod = policy.importrust("copy_tracing")

34

35

36

def _filter(src, dst, t):

36

def _filter(src, dst, t):

37

"""filters out invalid copies after chaining"""

37

"""filters out invalid copies after chaining"""

38

39

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

39

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

40

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

40

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

41

# in the following table (not including trivial cases). For example, case 2

41

# in the following table (not including trivial cases). For example, case 2

42

# is where a file existed in 'src' and remained under that name in 'mid' and

42

# is where a file existed in 'src' and remained under that name in 'mid' and

43

# then was renamed between 'mid' and 'dst'.

43

# then was renamed between 'mid' and 'dst'.

44

#

44

#

45

# case src mid dst result

45

# case src mid dst result

46

# 1 x y - -

46

# 1 x y - -

47

# 2 x y y x->y

47

# 2 x y y x->y

48

# 3 x y x -

48

# 3 x y x -

49

# 4 x y z x->z

49

# 4 x y z x->z

50

# 5 - x y -

50

# 5 - x y -

51

# 6 x x y x->y

51

# 6 x x y x->y

52

#

52

#

53

# _chain() takes care of chaining the copies in 'a' and 'b', but it

53

# _chain() takes care of chaining the copies in 'a' and 'b', but it

54

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

54

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

55

# between 5 and 6, so it includes all cases in its result.

55

# between 5 and 6, so it includes all cases in its result.

56

# Cases 1, 3, and 5 are then removed by _filter().

56

# Cases 1, 3, and 5 are then removed by _filter().

57

58

for k, v in list(t.items()):

58

for k, v in list(t.items()):

59

# remove copies from files that didn't exist

59

# remove copies from files that didn't exist

60

if v not in src:

60

if v not in src:

61

del t[k]

61

del t[k]

62

# remove criss-crossed copies

62

# remove criss-crossed copies

63

elif k in src and v in dst:

63

elif k in src and v in dst:

64

del t[k]

64

del t[k]

65

# remove copies to files that were then removed

65

# remove copies to files that were then removed

66

elif k not in dst:

66

elif k not in dst:

67

del t[k]

67

del t[k]

68

69

70

def _chain(prefix, suffix):

70

def _chain(prefix, suffix):

71

"""chain two sets of copies 'prefix' and 'suffix'"""

71

"""chain two sets of copies 'prefix' and 'suffix'"""

72

result = prefix.copy()

72

result = prefix.copy()

73

for key, value in pycompat.iteritems(suffix):

73

for key, value in pycompat.iteritems(suffix):

74

result[key] = prefix.get(value, value)

74

result[key] = prefix.get(value, value)

75

return result

75

return result

76

77

78

def _tracefile(fctx, am, basemf):

78

def _tracefile(fctx, am, basemf):

79

"""return file context that is the ancestor of fctx present in ancestor

79

"""return file context that is the ancestor of fctx present in ancestor

80

manifest am

80

manifest am

81

82

Note: we used to try and stop after a given limit, however checking if that

82

Note: we used to try and stop after a given limit, however checking if that

83

limit is reached turned out to be very expensive. we are better off

83

limit is reached turned out to be very expensive. we are better off

84

disabling that feature."""

84

disabling that feature."""

85

86

for f in fctx.ancestors():

86

for f in fctx.ancestors():

87

path = f.path()

87

path = f.path()

88

if am.get(path, None) == f.filenode():

88

if am.get(path, None) == f.filenode():

89

return path

89

return path

90

if basemf and basemf.get(path, None) == f.filenode():

90

if basemf and basemf.get(path, None) == f.filenode():

91

return path

91

return path

92

93

94

def _dirstatecopies(repo, match=None):

94

def _dirstatecopies(repo, match=None):

95

ds = repo.dirstate

95

ds = repo.dirstate

96

c = ds.copies().copy()

96

c = ds.copies().copy()

97

for k in list(c):

97

for k in list(c):

98

if ds[k] not in b'anm' or (match and not match(k)):

98

if ds[k] not in b'anm' or (match and not match(k)):

99

del c[k]

99

del c[k]

100

return c

100

return c

101

102

103

def _computeforwardmissing(a, b, match=None):

103

def _computeforwardmissing(a, b, match=None):

104

"""Computes which files are in b but not a.

104

"""Computes which files are in b but not a.

105

This is its own function so extensions can easily wrap this call to see what

105

This is its own function so extensions can easily wrap this call to see what

106

files _forwardcopies is about to process.

106

files _forwardcopies is about to process.

107

"""

107

"""

108

ma = a.manifest()

108

ma = a.manifest()

109

mb = b.manifest()

109

mb = b.manifest()

110

return mb.filesnotin(ma, match=match)

110

return mb.filesnotin(ma, match=match)

111

112

113

def usechangesetcentricalgo(repo):

113

def usechangesetcentricalgo(repo):

114

"""Checks if we should use changeset-centric copy algorithms"""

114

"""Checks if we should use changeset-centric copy algorithms"""

115

if repo.filecopiesmode == b'changeset-sidedata':

115

if repo.filecopiesmode == b'changeset-sidedata':

116

return True

116

return True

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

118

changesetsource = (b'changeset-only', b'compatibility')

118

changesetsource = (b'changeset-only', b'compatibility')

119

return readfrom in changesetsource

119

return readfrom in changesetsource

120

121

122

def _committedforwardcopies(a, b, base, match):

122

def _committedforwardcopies(a, b, base, match):

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

124

# files might have to be traced back to the fctx parent of the last

124

# files might have to be traced back to the fctx parent of the last

125

# one-side-only changeset, but not further back than that

125

# one-side-only changeset, but not further back than that

126

repo = a._repo

126

repo = a._repo

127

128

if usechangesetcentricalgo(repo):

128

if usechangesetcentricalgo(repo):

129

return _changesetforwardcopies(a, b, match)

129

return _changesetforwardcopies(a, b, match)

130

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

132

dbg = repo.ui.debug

132

dbg = repo.ui.debug

133

if debug:

133

if debug:

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

135

am = a.manifest()

135

am = a.manifest()

136

basemf = None if base is None else base.manifest()

136

basemf = None if base is None else base.manifest()

137

138

# find where new files came from

138

# find where new files came from

139

# we currently don't try to find where old files went, too expensive

139

# we currently don't try to find where old files went, too expensive

140

# this means we can miss a case like 'hg rm b; hg cp a b'

140

# this means we can miss a case like 'hg rm b; hg cp a b'

141

cm = {}

141

cm = {}

142

143

# Computing the forward missing is quite expensive on large manifests, since

143

# Computing the forward missing is quite expensive on large manifests, since

144

# it compares the entire manifests. We can optimize it in the common use

144

# it compares the entire manifests. We can optimize it in the common use

145

# case of computing what copies are in a commit versus its parent (like

145

# case of computing what copies are in a commit versus its parent (like

146

# during a rebase or histedit). Note, we exclude merge commits from this

146

# during a rebase or histedit). Note, we exclude merge commits from this

147

# optimization, since the ctx.files() for a merge commit is not correct for

147

# optimization, since the ctx.files() for a merge commit is not correct for

148

# this comparison.

148

# this comparison.

149

forwardmissingmatch = match

149

forwardmissingmatch = match

150

if b.p1() == a and b.p2().node() == node.nullid:

150

if b.p1() == a and b.p2().node() == node.nullid:

151

filesmatcher = matchmod.exact(b.files())

151

filesmatcher = matchmod.exact(b.files())

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

154

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

156

157

if debug:

157

if debug:

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

159

160

for f in sorted(missing):

160

for f in sorted(missing):

161

if debug:

161

if debug:

162

dbg(b'debug.copies: tracing file: %s\n' % f)

162

dbg(b'debug.copies: tracing file: %s\n' % f)

163

fctx = b[f]

163

fctx = b[f]

164

fctx._ancestrycontext = ancestrycontext

164

fctx._ancestrycontext = ancestrycontext

165

166

if debug:

166

if debug:

167

start = util.timer()

167

start = util.timer()

168

opath = _tracefile(fctx, am, basemf)

168

opath = _tracefile(fctx, am, basemf)

169

if opath:

169

if opath:

170

if debug:

170

if debug:

171

dbg(b'debug.copies: rename of: %s\n' % opath)

171

dbg(b'debug.copies: rename of: %s\n' % opath)

172

cm[f] = opath

172

cm[f] = opath

173

if debug:

173

if debug:

174

dbg(

174

dbg(

175

b'debug.copies: time: %f seconds\n'

175

b'debug.copies: time: %f seconds\n'

176

% (util.timer() - start)

176

% (util.timer() - start)

177

)

177

)

178

return cm

178

return cm

179

180

181

def _revinfo_getter(repo, match):

181

def _revinfo_getter(repo, match):

182

"""returns a function that returns the following data given a <rev>"

182

"""returns a function that returns the following data given a <rev>"

183

184

* p1: revision number of first parent

184

* p1: revision number of first parent

185

* p2: revision number of first parent

185

* p2: revision number of first parent

186

* changes: a ChangingFiles object

186

* changes: a ChangingFiles object

187

"""

187

"""

188

cl = repo.changelog

188

cl = repo.changelog

189

parents = cl.parentrevs

189

parents = cl.parentrevs

190

flags = cl.flags

190

flags = cl.flags

191

192

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

192

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

193

194

changelogrevision = cl.changelogrevision

194

changelogrevision = cl.changelogrevision

195

196

# A small cache to avoid doing the work twice for merges

196

# A small cache to avoid doing the work twice for merges

197

#

197

#

198

# In the vast majority of cases, if we ask information for a revision

198

# In the vast majority of cases, if we ask information for a revision

199

# about 1 parent, we'll later ask it for the other. So it make sense to

199

# about 1 parent, we'll later ask it for the other. So it make sense to

200

# keep the information around when reaching the first parent of a merge

200

# keep the information around when reaching the first parent of a merge

201

# and dropping it after it was provided for the second parents.

201

# and dropping it after it was provided for the second parents.

202

#

202

#

203

# It exists cases were only one parent of the merge will be walked. It

203

# It exists cases were only one parent of the merge will be walked. It

204

# happens when the "destination" the copy tracing is descendant from a

204

# happens when the "destination" the copy tracing is descendant from a

205

# new root, not common with the "source". In that case, we will only walk

205

# new root, not common with the "source". In that case, we will only walk

206

# through merge parents that are descendant of changesets common

206

# through merge parents that are descendant of changesets common

207

# between "source" and "destination".

207

# between "source" and "destination".

208

#

208

#

209

# With the current case implementation if such changesets have a copy

209

# With the current case implementation if such changesets have a copy

210

# information, we'll keep them in memory until the end of

210

# information, we'll keep them in memory until the end of

211

# _changesetforwardcopies. We don't expect the case to be frequent

211

# _changesetforwardcopies. We don't expect the case to be frequent

212

# enough to matters.

212

# enough to matters.

213

#

213

#

214

# In addition, it would be possible to reach pathological case, were

214

# In addition, it would be possible to reach pathological case, were

215

# many first parent are met before any second parent is reached. In

215

# many first parent are met before any second parent is reached. In

216

# that case the cache could grow. If this even become an issue one can

216

# that case the cache could grow. If this even become an issue one can

217

# safely introduce a maximum cache size. This would trade extra CPU/IO

217

# safely introduce a maximum cache size. This would trade extra CPU/IO

218

# time to save memory.

218

# time to save memory.

219

merge_caches = {}

219

merge_caches = {}

220

221

alwaysmatch = match.always()

221

alwaysmatch = match.always()

222

223

if rustmod is not None and alwaysmatch:

223

if rustmod is not None and alwaysmatch:

224

225

def revinfo(rev):

225

def revinfo(rev):

226

p1, p2 = parents(rev)

226

p1, p2 = parents(rev)

227

value = None

227

value = None

228

e = merge_caches.pop(rev, None)

228

e = merge_caches.pop(rev, None)

229

if e is not None:

229

if e is not None:

230

return e

230

return e

231

if flags(rev) & HASCOPIESINFO:

231

if flags(rev) & HASCOPIESINFO:

232

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

232

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

233

else:

233

else:

234

raw = None

234

raw = None

235

value = (p1, p2, raw)

235

value = (p1, p2, raw)

236

if p1 != node.nullrev and p2 != node.nullrev:

236

if p1 != node.nullrev and p2 != node.nullrev:

237

# XXX some case we over cache, IGNORE

237

# XXX some case we over cache, IGNORE

238

merge_caches[rev] = value

238

merge_caches[rev] = value

239

return value

239

return value

240

241

else:

241

else:

242

243

def revinfo(rev):

243

def revinfo(rev):

244

p1, p2 = parents(rev)

244

p1, p2 = parents(rev)

245

value = None

245

value = None

246

e = merge_caches.pop(rev, None)

246

e = merge_caches.pop(rev, None)

247

if e is not None:

247

if e is not None:

248

return e

248

return e

249

changes = None

249

changes = None

250

if flags(rev) & HASCOPIESINFO:

250

if flags(rev) & HASCOPIESINFO:

251

changes = changelogrevision(rev).changes

251

changes = changelogrevision(rev).changes

252

value = (p1, p2, changes)

252

value = (p1, p2, changes)

253

if p1 != node.nullrev and p2 != node.nullrev:

253

if p1 != node.nullrev and p2 != node.nullrev:

254

# XXX some case we over cache, IGNORE

254

# XXX some case we over cache, IGNORE

255

merge_caches[rev] = value

255

merge_caches[rev] = value

256

return value

256

return value

257

258

return revinfo

258

return revinfo

259

260

261

def cached_is_ancestor(is_ancestor):

261

def cached_is_ancestor(is_ancestor):

262

"""return a cached version of is_ancestor"""

262

"""return a cached version of is_ancestor"""

263

cache = {}

263

cache = {}

264

265

def _is_ancestor(anc, desc):

265

def _is_ancestor(anc, desc):

266

if anc > desc:

266

if anc > desc:

267

return False

267

return False

268

elif anc == desc:

268

elif anc == desc:

269

return True

269

return True

270

key = (anc, desc)

270

key = (anc, desc)

271

ret = cache.get(key)

271

ret = cache.get(key)

272

if ret is None:

272

if ret is None:

273

ret = cache[key] = is_ancestor(anc, desc)

273

ret = cache[key] = is_ancestor(anc, desc)

274

return ret

274

return ret

275

276

return _is_ancestor

276

return _is_ancestor

277

278

279

def _changesetforwardcopies(a, b, match):

279

def _changesetforwardcopies(a, b, match):

280

if a.rev() in (node.nullrev, b.rev()):

280

if a.rev() in (node.nullrev, b.rev()):

281

return {}

281

return {}

282

283

repo = a.repo().unfiltered()

283

repo = a.repo().unfiltered()

284

children = {}

284

children = {}

285

286

cl = repo.changelog

286

cl = repo.changelog

287

isancestor = cl.isancestorrev

287

isancestor = cl.isancestorrev

288

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

288

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

289

mrset = set(missingrevs)

289

mrset = set(missingrevs)

290

roots = set()

290

roots = set()

291

for r in missingrevs:

291

for r in missingrevs:

292

for p in cl.parentrevs(r):

292

for p in cl.parentrevs(r):

293

if p == node.nullrev:

293

if p == node.nullrev:

294

continue

294

continue

295

if p not in children:

295

if p not in children:

296

children[p] = [r]

296

children[p] = [r]

297

else:

297

else:

298

children[p].append(r)

298

children[p].append(r)

299

if p not in mrset:

299

if p not in mrset:

300

roots.add(p)

300

roots.add(p)

301

if not roots:

301

if not roots:

302

# no common revision to track copies from

302

# no common revision to track copies from

303

return {}

303

return {}

304

min_root = min(roots)

304

min_root = min(roots)

305

306

from_head = set(

306

from_head = set(

307

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

307

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

308

)

308

)

309

310

iterrevs = set(from_head)

310

iterrevs = set(from_head)

311

iterrevs &= mrset

311

iterrevs &= mrset

312

iterrevs.update(roots)

312

iterrevs.update(roots)

313

iterrevs.remove(b.rev())

313

iterrevs.remove(b.rev())

314

revs = sorted(iterrevs)

314

revs = sorted(iterrevs)

315

316

if repo.filecopiesmode == b'changeset-sidedata':

316

if repo.filecopiesmode == b'changeset-sidedata':

317

revinfo = _revinfo_getter(repo, match)

317

revinfo = _revinfo_getter(repo, match)

318

return _combine_changeset_copies(

318

return _combine_changeset_copies(

319

revs, children, b.rev(), revinfo, match, isancestor

319

revs, children, b.rev(), revinfo, match, isancestor

320

)

320

)

321

else:

321

else:

322

revinfo = _revinfo_getter_extra(repo)

322

revinfo = _revinfo_getter_extra(repo)

323

return _combine_changeset_copies_extra(

323

return _combine_changeset_copies_extra(

324

revs, children, b.rev(), revinfo, match, isancestor

324

revs, children, b.rev(), revinfo, match, isancestor

325

)

325

)

326

327

328

def _combine_changeset_copies(

328

def _combine_changeset_copies(

329

revs, children, targetrev, revinfo, match, isancestor

329

revs, children, targetrev, revinfo, match, isancestor

330

):

330

):

331

"""combine the copies information for each item of iterrevs

331

"""combine the copies information for each item of iterrevs

332

333

revs: sorted iterable of revision to visit

333

revs: sorted iterable of revision to visit

334

children: a {parent: [children]} mapping.

334

children: a {parent: [children]} mapping.

335

targetrev: the final copies destination revision (not in iterrevs)

335

targetrev: the final copies destination revision (not in iterrevs)

336

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

336

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

337

match: a matcher

337

match: a matcher

338

339

It returns the aggregated copies information for `targetrev`.

339

It returns the aggregated copies information for `targetrev`.

340

"""

340

"""

341

342

alwaysmatch = match.always()

342

alwaysmatch = match.always()

343

344

if rustmod is not None and alwaysmatch:

344

if rustmod is not None and alwaysmatch:

345

return rustmod.combine_changeset_copies(

345

return rustmod.combine_changeset_copies(

346

list(revs), children, targetrev, revinfo, isancestor

346

list(revs), children, targetrev, revinfo, isancestor

347

)

347

)

348

349

isancestor = cached_is_ancestor(isancestor)

349

isancestor = cached_is_ancestor(isancestor)

350

351

all_copies = {}

351

all_copies = {}

352

for r in revs:

352

for r in revs:

353

copies = all_copies.pop(r, None)

353

copies = all_copies.pop(r, None)

354

if copies is None:

354

if copies is None:

355

# this is a root

355

# this is a root

356

copies = {}

356

copies = {}

357

for i, c in enumerate(children[r]):

357

for i, c in enumerate(children[r]):

358

p1, p2, changes = revinfo(c)

358

p1, p2, changes = revinfo(c)

359

childcopies = {}

359

childcopies = {}

360

if r == p1:

360

if r == p1:

361

parent = 1

361

parent = 1

362

if changes is not None:

362

if changes is not None:

363

childcopies = changes.copied_from_p1

363

childcopies = changes.copied_from_p1

364

else:

364

else:

365

assert r == p2

365

assert r == p2

366

parent = 2

366

parent = 2

367

if changes is not None:

367

if changes is not None:

368

childcopies = changes.copied_from_p2

368

childcopies = changes.copied_from_p2

369

if not alwaysmatch:

369

if not alwaysmatch:

370

childcopies = {

370

childcopies = {

371

dst: src for dst, src in childcopies.items() if match(dst)

371

dst: src for dst, src in childcopies.items() if match(dst)

372

}

372

}

373

newcopies = copies

373

newcopies = copies

374

if childcopies:

374

if childcopies:

375

newcopies = copies.copy()

375

newcopies = copies.copy()

376

for dest, source in pycompat.iteritems(childcopies):

376

for dest, source in pycompat.iteritems(childcopies):

377

prev = copies.get(source)

377

prev = copies.get(source)

378

if prev is not None and prev[1] is not None:

378

if prev is not None and prev[1] is not None:

379

source = prev[1]

379

source = prev[1]

380

newcopies[dest] = (c, source)

380

newcopies[dest] = (c, source)

381

assert newcopies is not copies

381

assert newcopies is not copies

382

if changes is not None and changes.removed:

382

if changes is not None and changes.removed:

383

if newcopies is copies:

383

if newcopies is copies:

384

newcopies = copies.copy()

384

newcopies = copies.copy()

385

for f in changes.removed:

385

for f in changes.removed:

386

if f in newcopies:

386

if f in newcopies:

387

if newcopies is copies:

387

if newcopies is copies:

388

# copy on write to avoid affecting potential other

388

# copy on write to avoid affecting potential other

389

# branches. when there are no other branches, this

389

# branches. when there are no other branches, this

390

# could be avoided.

390

# could be avoided.

391

newcopies = copies.copy()

391

newcopies = copies.copy()

392

newcopies[f] = (c, None)

392

newcopies[f] = (c, None)

393

othercopies = all_copies.get(c)

393

othercopies = all_copies.get(c)

394

if othercopies is None:

394

if othercopies is None:

395

all_copies[c] = newcopies

395

all_copies[c] = newcopies

396

elif newcopies is othercopies:

396

elif newcopies is othercopies:

397

# nothing to merge:

397

# nothing to merge:

398

pass

398

pass

399

else:

399

else:

400

# we are the second parent to work on c, we need to merge our

400

# we are the second parent to work on c, we need to merge our

401

# work with the other.

401

# work with the other.

402

#

402

#

403

# In case of conflict, parent 1 take precedence over parent 2.

403

# In case of conflict, parent 1 take precedence over parent 2.

404

# This is an arbitrary choice made anew when implementing

404

# This is an arbitrary choice made anew when implementing

405

# changeset based copies. It was made without regards with

405

# changeset based copies. It was made without regards with

406

# potential filelog related behavior.

406

# potential filelog related behavior.

407

if parent == 1:

407

if parent == 1:

408

if newcopies is copies:

408

if newcopies is copies:

409

newcopies = copies.copy()

409

newcopies = copies.copy()

410

minor, major = othercopies, newcopies

410

minor, major = othercopies, newcopies

411

else:

411

else:

412

# we do not know if the other dict is a copy or not, so we

412

# we do not know if the other dict is a copy or not, so we

413

# need to blindly copy it. Future change should make this

413

# need to blindly copy it. Future change should make this

414

# unnecessary.

414

# unnecessary.

415

minor, major = newcopies, othercopies.copy()

415

minor, major = newcopies, othercopies.copy()

416

copies = _merge_copies_dict(minor, major, isancestor, changes)

416

copies = _merge_copies_dict(minor, major, isancestor, changes)

417

all_copies[c] = copies

417

all_copies[c] = copies

418

419

final_copies = {}

419

final_copies = {}

420

for dest, (tt, source) in all_copies[targetrev].items():

420

for dest, (tt, source) in all_copies[targetrev].items():

421

if source is not None:

421

if source is not None:

422

final_copies[dest] = source

422

final_copies[dest] = source

423

return final_copies

423

return final_copies

424

425

426

def _merge_copies_dict(minor, major, isancestor, changes):

426

def _merge_copies_dict(minor, major, isancestor, changes):

427

"""merge two copies-mapping together, minor and major

427

"""merge two copies-mapping together, minor and major

428

429

In case of conflict, value from "major" will be picked.

429

In case of conflict, value from "major" will be picked.

430

431

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

431

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

432

ancestors of `high_rev`,

432

ancestors of `high_rev`,

433

434

- `ismerged(path)`: callable return True if `path` have been merged in the

434

- `ismerged(path)`: callable return True if `path` have been merged in the

435

current revision,

435

current revision,

436

437

return the resulting dict (in practice, the "minor" object, updated)

437

return the resulting dict (in practice, the "minor" object, updated)

438

"""

438

"""

439

for dest, value in major.items():

439

for dest, value in major.items():

440

other = minor.get(dest)

440

other = minor.get(dest)

441

if other is None:

441

if other is None:

442

minor[dest] = value

442

minor[dest] = value

443

else:

443

else:

444

new_tt = value[0]

444

new_tt = value[0]

445

other_tt = other[0]

445

other_tt = other[0]

446

if value[1] == other[1]:

446

if value[1] == other[1]:

447

continue

447

continue

448

# content from "major" wins, unless it is older

448

# content from "major" wins, unless it is older

449

# than the branch point or there is a merge

449

# than the branch point or there is a merge

450

if new_tt == other_tt:

450

if new_tt == other_tt:

451

minor[dest] = value

451

minor[dest] = value

452

elif (

452

elif (

453

changes is not None

453

changes is not None

454

and value[1] is None

454

and value[1] is None

455

and dest in changes.salvaged

455

and dest in changes.salvaged

456

):

456

):

457

pass

457

pass

458

elif (

458

elif (

459

changes is not None

459

changes is not None

460

and other[1] is None

460

and other[1] is None

461

and dest in changes.salvaged

461

and dest in changes.salvaged

462

):

462

):

463

minor[dest] = value

463

minor[dest] = value

464

elif changes is not None and dest in changes.merged:

464

elif changes is not None and dest in changes.merged:

465

minor[dest] = value

465

minor[dest] = value

466

elif not isancestor(new_tt, other_tt):

466

elif not isancestor(new_tt, other_tt):

467

if value[1] is not None:

467

if value[1] is not None:

468

minor[dest] = value

468

minor[dest] = value

469

elif isancestor(other_tt, new_tt):

469

elif isancestor(other_tt, new_tt):

470

minor[dest] = value

470

minor[dest] = value

471

return minor

471

return minor

472

473

474

def _revinfo_getter_extra(repo):

474

def _revinfo_getter_extra(repo):

475

"""return a function that return multiple data given a <rev>"i

475

"""return a function that return multiple data given a <rev>"i

476

477

* p1: revision number of first parent

477

* p1: revision number of first parent

478

* p2: revision number of first parent

478

* p2: revision number of first parent

479

* p1copies: mapping of copies from p1

479

* p1copies: mapping of copies from p1

480

* p2copies: mapping of copies from p2

480

* p2copies: mapping of copies from p2

481

* removed: a list of removed files

481

* removed: a list of removed files

482

* ismerged: a callback to know if file was merged in that revision

482

* ismerged: a callback to know if file was merged in that revision

483

"""

483

"""

484

cl = repo.changelog

484

cl = repo.changelog

485

parents = cl.parentrevs

485

parents = cl.parentrevs

486

487

def get_ismerged(rev):

487

def get_ismerged(rev):

488

ctx = repo[rev]

488

ctx = repo[rev]

489

490

def ismerged(path):

490

def ismerged(path):

491

if path not in ctx.files():

491

if path not in ctx.files():

492

return False

492

return False

493

fctx = ctx[path]

493

fctx = ctx[path]

494

parents = fctx._filelog.parents(fctx._filenode)

494

parents = fctx._filelog.parents(fctx._filenode)

495

nb_parents = 0

495

nb_parents = 0

496

for n in parents:

496

for n in parents:

497

if n != node.nullid:

497

if n != node.nullid:

498

nb_parents += 1

498

nb_parents += 1

499

return nb_parents >= 2

499

return nb_parents >= 2

500

501

return ismerged

501

return ismerged

502

503

def revinfo(rev):

503

def revinfo(rev):

504

p1, p2 = parents(rev)

504

p1, p2 = parents(rev)

505

ctx = repo[rev]

505

ctx = repo[rev]

506

p1copies, p2copies = ctx._copies

506

p1copies, p2copies = ctx._copies

507

removed = ctx.filesremoved()

507

removed = ctx.filesremoved()

508

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

508

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

509

510

return revinfo

510

return revinfo

511

512

513

def _combine_changeset_copies_extra(

513

def _combine_changeset_copies_extra(

514

revs, children, targetrev, revinfo, match, isancestor

514

revs, children, targetrev, revinfo, match, isancestor

515

):

515

):

516

"""version of `_combine_changeset_copies` that works with the Google

516

"""version of `_combine_changeset_copies` that works with the Google

517

specific "extra" based storage for copy information"""

517

specific "extra" based storage for copy information"""

518

all_copies = {}

518

all_copies = {}

519

alwaysmatch = match.always()

519

alwaysmatch = match.always()

520

for r in revs:

520

for r in revs:

521

copies = all_copies.pop(r, None)

521

copies = all_copies.pop(r, None)

522

if copies is None:

522

if copies is None:

523

# this is a root

523

# this is a root

524

copies = {}

524

copies = {}

525

for i, c in enumerate(children[r]):

525

for i, c in enumerate(children[r]):

526

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

526

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

527

if r == p1:

527

if r == p1:

528

parent = 1

528

parent = 1

529

childcopies = p1copies

529

childcopies = p1copies

530

else:

530

else:

531

assert r == p2

531

assert r == p2

532

parent = 2

532

parent = 2

533

childcopies = p2copies

533

childcopies = p2copies

534

if not alwaysmatch:

534

if not alwaysmatch:

535

childcopies = {

535

childcopies = {

536

dst: src for dst, src in childcopies.items() if match(dst)

536

dst: src for dst, src in childcopies.items() if match(dst)

537

}

537

}

538

newcopies = copies

538

newcopies = copies

539

if childcopies:

539

if childcopies:

540

newcopies = copies.copy()

540

newcopies = copies.copy()

541

for dest, source in pycompat.iteritems(childcopies):

541

for dest, source in pycompat.iteritems(childcopies):

542

prev = copies.get(source)

542

prev = copies.get(source)

543

if prev is not None and prev[1] is not None:

543

if prev is not None and prev[1] is not None:

544

source = prev[1]

544

source = prev[1]

545

newcopies[dest] = (c, source)

545

newcopies[dest] = (c, source)

546

assert newcopies is not copies

546

assert newcopies is not copies

547

for f in removed:

547

for f in removed:

548

if f in newcopies:

548

if f in newcopies:

549

if newcopies is copies:

549

if newcopies is copies:

550

# copy on write to avoid affecting potential other

550

# copy on write to avoid affecting potential other

551

# branches. when there are no other branches, this

551

# branches. when there are no other branches, this

552

# could be avoided.

552

# could be avoided.

553

newcopies = copies.copy()

553

newcopies = copies.copy()

554

newcopies[f] = (c, None)

554

newcopies[f] = (c, None)

555

othercopies = all_copies.get(c)

555

othercopies = all_copies.get(c)

556

if othercopies is None:

556

if othercopies is None:

557

all_copies[c] = newcopies

557

all_copies[c] = newcopies

558

else:

558

else:

559

# we are the second parent to work on c, we need to merge our

559

# we are the second parent to work on c, we need to merge our

560

# work with the other.

560

# work with the other.

561

#

561

#

562

# In case of conflict, parent 1 take precedence over parent 2.

562

# In case of conflict, parent 1 take precedence over parent 2.

563

# This is an arbitrary choice made anew when implementing

563

# This is an arbitrary choice made anew when implementing

564

# changeset based copies. It was made without regards with

564

# changeset based copies. It was made without regards with

565

# potential filelog related behavior.

565

# potential filelog related behavior.

566

if parent == 1:

566

if parent == 1:

567

_merge_copies_dict_extra(

567

_merge_copies_dict_extra(

568

othercopies, newcopies, isancestor, ismerged

568

othercopies, newcopies, isancestor, ismerged

569

)

569

)

570

else:

570

else:

571

_merge_copies_dict_extra(

571

_merge_copies_dict_extra(

572

newcopies, othercopies, isancestor, ismerged

572

newcopies, othercopies, isancestor, ismerged

573

)

573

)

574

all_copies[c] = newcopies

574

all_copies[c] = newcopies

575

576

final_copies = {}

576

final_copies = {}

577

for dest, (tt, source) in all_copies[targetrev].items():

577

for dest, (tt, source) in all_copies[targetrev].items():

578

if source is not None:

578

if source is not None:

579

final_copies[dest] = source

579

final_copies[dest] = source

580

return final_copies

580

return final_copies

581

582

583

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

583

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

584

"""version of `_merge_copies_dict` that works with the Google

584

"""version of `_merge_copies_dict` that works with the Google

585

specific "extra" based storage for copy information"""

585

specific "extra" based storage for copy information"""

586

for dest, value in major.items():

586

for dest, value in major.items():

587

other = minor.get(dest)

587

other = minor.get(dest)

588

if other is None:

588

if other is None:

589

minor[dest] = value

589

minor[dest] = value

590

else:

590

else:

591

new_tt = value[0]

591

new_tt = value[0]

592

other_tt = other[0]

592

other_tt = other[0]

593

if value[1] == other[1]:

593

if value[1] == other[1]:

594

continue

594

continue

595

# content from "major" wins, unless it is older

595

# content from "major" wins, unless it is older

596

# than the branch point or there is a merge

596

# than the branch point or there is a merge

597

if (

597

if (

598

new_tt == other_tt

598

new_tt == other_tt

599

or not isancestor(new_tt, other_tt)

599

or not isancestor(new_tt, other_tt)

600

or ismerged(dest)

600

or ismerged(dest)

601

):

601

):

602

minor[dest] = value

602

minor[dest] = value

603

604

605

def _forwardcopies(a, b, base=None, match=None):

605

def _forwardcopies(a, b, base=None, match=None):

606

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

606

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

607

608

if base is None:

608

if base is None:

609

base = a

609

base = a

610

match = a.repo().narrowmatch(match)

610

match = a.repo().narrowmatch(match)

611

# check for working copy

611

# check for working copy

612

if b.rev() is None:

612

if b.rev() is None:

613

cm = _committedforwardcopies(a, b.p1(), base, match)

613

cm = _committedforwardcopies(a, b.p1(), base, match)

614

# combine copies from dirstate if necessary

614

# combine copies from dirstate if necessary

615

copies = _chain(cm, _dirstatecopies(b._repo, match))

615

copies = _chain(cm, _dirstatecopies(b._repo, match))

616

else:

616

else:

617

copies = _committedforwardcopies(a, b, base, match)

617

copies = _committedforwardcopies(a, b, base, match)

618

return copies

618

return copies

619

620

621

def _backwardrenames(a, b, match):

621

def _backwardrenames(a, b, match):

622

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

622

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

623

return {}

623

return {}

624

625

# Even though we're not taking copies into account, 1:n rename situations

625

# Even though we're not taking copies into account, 1:n rename situations

626

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

626

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

627

# arbitrarily pick one of the renames.

627

# arbitrarily pick one of the renames.

628

# We don't want to pass in "match" here, since that would filter

628

# We don't want to pass in "match" here, since that would filter

629

# the destination by it. Since we're reversing the copies, we want

629

# the destination by it. Since we're reversing the copies, we want

630

# to filter the source instead.

630

# to filter the source instead.

631

f = _forwardcopies(b, a)

631

f = _forwardcopies(b, a)

632

r = {}

632

r = {}

633

for k, v in sorted(pycompat.iteritems(f)):

633

for k, v in sorted(pycompat.iteritems(f)):

634

if match and not match(v):

634

if match and not match(v):

635

continue

635

continue

636

# remove copies

636

# remove copies

637

if v in a:

637

if v in a:

638

continue

638

continue

639

r[v] = k

639

r[v] = k

640

return r

640

return r

641

642

643

def pathcopies(x, y, match=None):

643

def pathcopies(x, y, match=None):

644

"""find {dst@y: src@x} copy mapping for directed compare"""

644

"""find {dst@y: src@x} copy mapping for directed compare"""

645

repo = x._repo

645

repo = x._repo

646

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

646

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

647

if debug:

647

if debug:

648

repo.ui.debug(

648

repo.ui.debug(

649

b'debug.copies: searching copies from %s to %s\n' % (x, y)

649

b'debug.copies: searching copies from %s to %s\n' % (x, y)

650

)

650

)

651

if x == y or not x or not y:

651

if x == y or not x or not y:

652

return {}

652

return {}

653

if y.rev() is None and x == y.p1():

653

if y.rev() is None and x == y.p1():

654

if debug:

654

if debug:

655

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

655

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

656

# short-circuit to avoid issues with merge states

656

# short-circuit to avoid issues with merge states

657

return _dirstatecopies(repo, match)

657

return _dirstatecopies(repo, match)

658

a = y.ancestor(x)

658

a = y.ancestor(x)

659

if a == x:

659

if a == x:

660

if debug:

660

if debug:

661

repo.ui.debug(b'debug.copies: search mode: forward\n')

661

repo.ui.debug(b'debug.copies: search mode: forward\n')

662

copies = _forwardcopies(x, y, match=match)

662

copies = _forwardcopies(x, y, match=match)

663

elif a == y:

663

elif a == y:

664

if debug:

664

if debug:

665

repo.ui.debug(b'debug.copies: search mode: backward\n')

665

repo.ui.debug(b'debug.copies: search mode: backward\n')

666

copies = _backwardrenames(x, y, match=match)

666

copies = _backwardrenames(x, y, match=match)

667

else:

667

else:

668

if debug:

668

if debug:

669

repo.ui.debug(b'debug.copies: search mode: combined\n')

669

repo.ui.debug(b'debug.copies: search mode: combined\n')

670

base = None

670

base = None

671

if a.rev() != node.nullrev:

671

if a.rev() != node.nullrev:

672

base = x

672

base = x

673

copies = _chain(

673

copies = _chain(

674

_backwardrenames(x, a, match=match),

674

_backwardrenames(x, a, match=match),

675

_forwardcopies(a, y, base, match=match),

675

_forwardcopies(a, y, base, match=match),

676

)

676

)

677

_filter(x, y, copies)

677

_filter(x, y, copies)

678

return copies

678

return copies

679

680

681

def mergecopies(repo, c1, c2, base):

681

def mergecopies(repo, c1, c2, base):

682

"""

682

"""

683

Finds moves and copies between context c1 and c2 that are relevant for

683

Finds moves and copies between context c1 and c2 that are relevant for

684

merging. 'base' will be used as the merge base.

684

merging. 'base' will be used as the merge base.

685

686

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

686

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

687

files that were moved/ copied in one merge parent and modified in another.

687

files that were moved/ copied in one merge parent and modified in another.

688

For example:

688

For example:

689

690

o ---> 4 another commit

690

o ---> 4 another commit

691

|

691

|

692

| o ---> 3 commit that modifies a.txt

692

| o ---> 3 commit that modifies a.txt

693

| /

693

| /

694

o / ---> 2 commit that moves a.txt to b.txt

694

o / ---> 2 commit that moves a.txt to b.txt

695

|/

695

|/

696

o ---> 1 merge base

696

o ---> 1 merge base

697

698

If we try to rebase revision 3 on revision 4, since there is no a.txt in

698

If we try to rebase revision 3 on revision 4, since there is no a.txt in

699

revision 4, and if user have copytrace disabled, we prints the following

699

revision 4, and if user have copytrace disabled, we prints the following

700

message:

700

message:

701

702

```other changed <file> which local deleted```

702

```other changed <file> which local deleted```

703

704

Returns a tuple where:

704

Returns a tuple where:

705

706

"branch_copies" an instance of branch_copies.

706

"branch_copies" an instance of branch_copies.

707

708

"diverge" is a mapping of source name -> list of destination names

708

"diverge" is a mapping of source name -> list of destination names

709

for divergent renames.

709

for divergent renames.

710

711

This function calls different copytracing algorithms based on config.

711

This function calls different copytracing algorithms based on config.

712

"""

712

"""

713

# avoid silly behavior for update from empty dir

713

# avoid silly behavior for update from empty dir

714

if not c1 or not c2 or c1 == c2:

714

if not c1 or not c2 or c1 == c2:

715

return branch_copies(), branch_copies(), {}

715

return branch_copies(), branch_copies(), {}

716

717

narrowmatch = c1.repo().narrowmatch()

717

narrowmatch = c1.repo().narrowmatch()

718

719

# avoid silly behavior for parent -> working dir

719

# avoid silly behavior for parent -> working dir

720

if c2.node() is None and c1.node() == repo.dirstate.p1():

720

if c2.node() is None and c1.node() == repo.dirstate.p1():

721

return (

721

return (

722

branch_copies(_dirstatecopies(repo, narrowmatch)),

722

branch_copies(_dirstatecopies(repo, narrowmatch)),

723

branch_copies(),

723

branch_copies(),

724

{},

724

{},

725

)

725

)

726

727

copytracing = repo.ui.config(b'experimental', b'copytrace')

727

copytracing = repo.ui.config(b'experimental', b'copytrace')

728

if stringutil.parsebool(copytracing) is False:

728

if stringutil.parsebool(copytracing) is False:

729

# stringutil.parsebool() returns None when it is unable to parse the

729

# stringutil.parsebool() returns None when it is unable to parse the

730

# value, so we should rely on making sure copytracing is on such cases

730

# value, so we should rely on making sure copytracing is on such cases

731

return branch_copies(), branch_copies(), {}

731

return branch_copies(), branch_copies(), {}

732

733

if usechangesetcentricalgo(repo):

733

if usechangesetcentricalgo(repo):

734

# The heuristics don't make sense when we need changeset-centric algos

734

# The heuristics don't make sense when we need changeset-centric algos

735

return _fullcopytracing(repo, c1, c2, base)

735

return _fullcopytracing(repo, c1, c2, base)

736

737

# Copy trace disabling is explicitly below the node == p1 logic above

737

# Copy trace disabling is explicitly below the node == p1 logic above

738

# because the logic above is required for a simple copy to be kept across a

738

# because the logic above is required for a simple copy to be kept across a

739

# rebase.

739

# rebase.

740

if copytracing == b'heuristics':

740

if copytracing == b'heuristics':

741

# Do full copytracing if only non-public revisions are involved as

741

# Do full copytracing if only non-public revisions are involved as

742

# that will be fast enough and will also cover the copies which could

742

# that will be fast enough and will also cover the copies which could

743

# be missed by heuristics

743

# be missed by heuristics

744

if _isfullcopytraceable(repo, c1, base):

744

if _isfullcopytraceable(repo, c1, base):

745

return _fullcopytracing(repo, c1, c2, base)

745

return _fullcopytracing(repo, c1, c2, base)

746

return _heuristicscopytracing(repo, c1, c2, base)

746

return _heuristicscopytracing(repo, c1, c2, base)

747

else:

747

else:

748

return _fullcopytracing(repo, c1, c2, base)

748

return _fullcopytracing(repo, c1, c2, base)

749

750

751

def _isfullcopytraceable(repo, c1, base):

751

def _isfullcopytraceable(repo, c1, base):

752

"""Checks that if base, source and destination are all no-public branches,

752

"""Checks that if base, source and destination are all no-public branches,

753

if yes let's use the full copytrace algorithm for increased capabilities

753

if yes let's use the full copytrace algorithm for increased capabilities

754

since it will be fast enough.

754

since it will be fast enough.

755

756

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

756

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

757

number of changesets from c1 to base such that if number of changesets are

757

number of changesets from c1 to base such that if number of changesets are

758

more than the limit, full copytracing algorithm won't be used.

758

more than the limit, full copytracing algorithm won't be used.

759

"""

759

"""

760

if c1.rev() is None:

760

if c1.rev() is None:

761

c1 = c1.p1()

761

c1 = c1.p1()

762

if c1.mutable() and base.mutable():

762

if c1.mutable() and base.mutable():

763

sourcecommitlimit = repo.ui.configint(

763

sourcecommitlimit = repo.ui.configint(

764

b'experimental', b'copytrace.sourcecommitlimit'

764

b'experimental', b'copytrace.sourcecommitlimit'

765

)

765

)

766

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

766

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

767

return commits < sourcecommitlimit

767

return commits < sourcecommitlimit

768

return False

768

return False

769

770

771

def _checksinglesidecopies(

771

def _checksinglesidecopies(

772

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

772

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

773

):

773

):

774

if src not in m2:

774

if src not in m2:

775

# deleted on side 2

775

# deleted on side 2

776

if src not in m1:

776

if src not in m1:

777

# renamed on side 1, deleted on side 2

777

# renamed on side 1, deleted on side 2

778

renamedelete[src] = dsts1

778

renamedelete[src] = dsts1

779

elif src not in mb:

779

elif src not in mb:

780

# Work around the "short-circuit to avoid issues with merge states"

780

# Work around the "short-circuit to avoid issues with merge states"

781

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

781

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

782

# destination doesn't exist in y.

782

# destination doesn't exist in y.

783

pass

783

pass

784

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

784

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

785

return

785

return

786

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

786

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

787

# modified on side 2

787

# modified on side 2

788

for dst in dsts1:

788

for dst in dsts1:

789

copy[dst] = src

789

copy[dst] = src

790

791

792

class branch_copies(object):

792

class branch_copies(object):

793

"""Information about copies made on one side of a merge/graft.

793

"""Information about copies made on one side of a merge/graft.

794

795

"copy" is a mapping from destination name -> source name,

795

"copy" is a mapping from destination name -> source name,

796

where source is in c1 and destination is in c2 or vice-versa.

796

where source is in c1 and destination is in c2 or vice-versa.

797

798

"movewithdir" is a mapping from source name -> destination name,

798

"movewithdir" is a mapping from source name -> destination name,

799

where the file at source present in one context but not the other

799

where the file at source present in one context but not the other

800

needs to be moved to destination by the merge process, because the

800

needs to be moved to destination by the merge process, because the

801

other context moved the directory it is in.

801

other context moved the directory it is in.

802

803

"renamedelete" is a mapping of source name -> list of destination

803

"renamedelete" is a mapping of source name -> list of destination

804

names for files deleted in c1 that were renamed in c2 or vice-versa.

804

names for files deleted in c1 that were renamed in c2 or vice-versa.

805

806

"dirmove" is a mapping of detected source dir -> destination dir renames.

806

"dirmove" is a mapping of detected source dir -> destination dir renames.

807

This is needed for handling changes to new files previously grafted into

807

This is needed for handling changes to new files previously grafted into

808

renamed directories.

808

renamed directories.

809

"""

809

"""

810

811

def __init__(

811

def __init__(

812

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

812

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

813

):

813

):

814

self.copy = {} if copy is None else copy

814

self.copy = {} if copy is None else copy

815

self.renamedelete = {} if renamedelete is None else renamedelete

815

self.renamedelete = {} if renamedelete is None else renamedelete

816

self.dirmove = {} if dirmove is None else dirmove

816

self.dirmove = {} if dirmove is None else dirmove

817

self.movewithdir = {} if movewithdir is None else movewithdir

817

self.movewithdir = {} if movewithdir is None else movewithdir

818

819

def __repr__(self):

819

def __repr__(self):

820

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

820

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

821

self.copy,

821

self.copy,

822

self.renamedelete,

822

self.renamedelete,

823

self.dirmove,

823

self.dirmove,

824

self.movewithdir,

824

self.movewithdir,

825

)

825

)

826

827

828

def _fullcopytracing(repo, c1, c2, base):

828

def _fullcopytracing(repo, c1, c2, base):

829

"""The full copytracing algorithm which finds all the new files that were

829

"""The full copytracing algorithm which finds all the new files that were

830

added from merge base up to the top commit and for each file it checks if

830

added from merge base up to the top commit and for each file it checks if

831

this file was copied from another file.

831

this file was copied from another file.

832

833

This is pretty slow when a lot of changesets are involved but will track all

833

This is pretty slow when a lot of changesets are involved but will track all

834

the copies.

834

the copies.

835

"""

835

"""

836

m1 = c1.manifest()

836

m1 = c1.manifest()

837

m2 = c2.manifest()

837

m2 = c2.manifest()

838

mb = base.manifest()

838

mb = base.manifest()

839

840

copies1 = pathcopies(base, c1)

840

copies1 = pathcopies(base, c1)

841

copies2 = pathcopies(base, c2)

841

copies2 = pathcopies(base, c2)

842

843

if not (copies1 or copies2):

843

if not (copies1 or copies2):

844

return branch_copies(), branch_copies(), {}

844

return branch_copies(), branch_copies(), {}

845

846

inversecopies1 = {}

846

inversecopies1 = {}

847

inversecopies2 = {}

847

inversecopies2 = {}

848

for dst, src in copies1.items():

848

for dst, src in copies1.items():

849

inversecopies1.setdefault(src, []).append(dst)

849

inversecopies1.setdefault(src, []).append(dst)

850

for dst, src in copies2.items():

850

for dst, src in copies2.items():

851

inversecopies2.setdefault(src, []).append(dst)

851

inversecopies2.setdefault(src, []).append(dst)

852

853

copy1 = {}

853

copy1 = {}

854

copy2 = {}

854

copy2 = {}

855

diverge = {}

855

diverge = {}

856

renamedelete1 = {}

856

renamedelete1 = {}

857

renamedelete2 = {}

857

renamedelete2 = {}

858

allsources = set(inversecopies1) | set(inversecopies2)

858

allsources = set(inversecopies1) | set(inversecopies2)

859

for src in allsources:

859

for src in allsources:

860

dsts1 = inversecopies1.get(src)

860

dsts1 = inversecopies1.get(src)

861

dsts2 = inversecopies2.get(src)

861

dsts2 = inversecopies2.get(src)

862

if dsts1 and dsts2:

862

if dsts1 and dsts2:

863

# copied/renamed on both sides

863

# copied/renamed on both sides

864

if src not in m1 and src not in m2:

864

if src not in m1 and src not in m2:

865

# renamed on both sides

865

# renamed on both sides

866

dsts1 = set(dsts1)

866

dsts1 = set(dsts1)

867

dsts2 = set(dsts2)

867

dsts2 = set(dsts2)

868

# If there's some overlap in the rename destinations, we

868

# If there's some overlap in the rename destinations, we

869

# consider it not divergent. For example, if side 1 copies 'a'

869

# consider it not divergent. For example, if side 1 copies 'a'

870

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

870

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

871

# and 'd' and deletes 'a'.

871

# and 'd' and deletes 'a'.

872

if dsts1 & dsts2:

872

if dsts1 & dsts2:

873

for dst in dsts1 & dsts2:

873

for dst in dsts1 & dsts2:

874

copy1[dst] = src

874

copy1[dst] = src

875

copy2[dst] = src

875

copy2[dst] = src

876

else:

876

else:

877

diverge[src] = sorted(dsts1 | dsts2)

877

diverge[src] = sorted(dsts1 | dsts2)

878

elif src in m1 and src in m2:

878

elif src in m1 and src in m2:

879

# copied on both sides

879

# copied on both sides

880

dsts1 = set(dsts1)

880

dsts1 = set(dsts1)

881

dsts2 = set(dsts2)

881

dsts2 = set(dsts2)

882

for dst in dsts1 & dsts2:

882

for dst in dsts1 & dsts2:

883

copy1[dst] = src

883

copy1[dst] = src

884

copy2[dst] = src

884

copy2[dst] = src

885

# TODO: Handle cases where it was renamed on one side and copied

885

# TODO: Handle cases where it was renamed on one side and copied

886

# on the other side

886

# on the other side

887

elif dsts1:

887

elif dsts1:

888

# copied/renamed only on side 1

888

# copied/renamed only on side 1

889

_checksinglesidecopies(

889

_checksinglesidecopies(

890

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

890

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

891

)

891

)

892

elif dsts2:

892

elif dsts2:

893

# copied/renamed only on side 2

893

# copied/renamed only on side 2

894

_checksinglesidecopies(

894

_checksinglesidecopies(

895

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

895

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

896

)

896

)

897

898

# find interesting file sets from manifests

898

# find interesting file sets from manifests

899

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

899

cache = []

900

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

900

901

u1 = sorted(addedinm1 - addedinm2)

901

def _get_addedfiles(idx):

902

u2 = sorted(addedinm2 - addedinm1)

902

if not cache:

903

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

904

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

905

u1 = sorted(addedinm1 - addedinm2)

906

u2 = sorted(addedinm2 - addedinm1)

907

cache.extend((u1, u2))

908

return cache[idx]

903

909

904

header = b" unmatched files in %s"

910

u1fn = lambda: _get_addedfiles(0)

905

if u1:

911

u2fn = lambda: _get_addedfiles(1)

906

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

912

if repo.ui.debugflag:

907

if u2:

913

u1 = u1fn()

908

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

914

u2 = u2fn()

909

915

910

if repo.ui.debugflag:

916

header = b" unmatched files in %s"

917

if u1:

918

repo.ui.debug(

919

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

920

)

921

if u2:

922

repo.ui.debug(

923

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

924

)

925

911

renamedeleteset = set()

926

renamedeleteset = set()

912

divergeset = set()

927

divergeset = set()

913

for dsts in diverge.values():

928

for dsts in diverge.values():

914

divergeset.update(dsts)

929

divergeset.update(dsts)

915

for dsts in renamedelete1.values():

930

for dsts in renamedelete1.values():

916

renamedeleteset.update(dsts)

931

renamedeleteset.update(dsts)

917

for dsts in renamedelete2.values():

932

for dsts in renamedelete2.values():

918

renamedeleteset.update(dsts)

933

renamedeleteset.update(dsts)

919

934

920

repo.ui.debug(

935

repo.ui.debug(

921

b" all copies found (* = to merge, ! = divergent, "

936

b" all copies found (* = to merge, ! = divergent, "

922

b"% = renamed and deleted):\n"

937

b"% = renamed and deleted):\n"

923

)

938

)

924

for side, copies in ((b"local", copies1), (b"remote", copies2)):

939

for side, copies in ((b"local", copies1), (b"remote", copies2)):

925

if not copies:

940

if not copies:

926

continue

941

continue

927

repo.ui.debug(b" on %s side:\n" % side)

942

repo.ui.debug(b" on %s side:\n" % side)

928

for f in sorted(copies):

943

for f in sorted(copies):

929

note = b""

944

note = b""

930

if f in copy1 or f in copy2:

945

if f in copy1 or f in copy2:

931

note += b"*"

946

note += b"*"

932

if f in divergeset:

947

if f in divergeset:

933

note += b"!"

948

note += b"!"

934

if f in renamedeleteset:

949

if f in renamedeleteset:

935

note += b"%"

950

note += b"%"

936

repo.ui.debug(

951

repo.ui.debug(

937

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

952

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

938

)

953

)

939

del renamedeleteset

954

del renamedeleteset

940

del divergeset

955

del divergeset

941

956

942

repo.ui.debug(b" checking for directory renames\n")

957

repo.ui.debug(b" checking for directory renames\n")

943

958

944

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

959

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

945

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

960

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

946

961

947

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

962

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

948

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

963

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

949

964

950

return branch_copies1, branch_copies2, diverge

965

return branch_copies1, branch_copies2, diverge

951

966

952

967

953

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

968

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

954

"""Finds moved directories and files that should move with them.

969

"""Finds moved directories and files that should move with them.

955

970

956

ctx: the context for one of the sides

971

ctx: the context for one of the sides

957

copy: files copied on the same side (as ctx)

972

copy: files copied on the same side (as ctx)

958

fullcopy: files copied on the same side (as ctx), including those that

973

fullcopy: files copied on the same side (as ctx), including those that

959

merge.manifestmerge() won't care about

974

merge.manifestmerge() won't care about

960

addedfiles: added files on the other side (compared to ~~ctx)~~

975

addedfilesfn: function returning added files on the other side (compared to

976

ctx)

961

"""

977

"""

962

# generate a directory move map

978

# generate a directory move map

963

invalid = set()

979

invalid = set()

964

dirmove = {}

980

dirmove = {}

965

981

966

# examine each file copy for a potential directory move, which is

982

# examine each file copy for a potential directory move, which is

967

# when all the files in a directory are moved to a new directory

983

# when all the files in a directory are moved to a new directory

968

for dst, src in pycompat.iteritems(fullcopy):

984

for dst, src in pycompat.iteritems(fullcopy):

969

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

985

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

970

if dsrc in invalid:

986

if dsrc in invalid:

971

# already seen to be uninteresting

987

# already seen to be uninteresting

972

continue

988

continue

973

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

989

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

974

# directory wasn't entirely moved locally

990

# directory wasn't entirely moved locally

975

invalid.add(dsrc)

991

invalid.add(dsrc)

976

elif dsrc in dirmove and dirmove[dsrc] != ddst:

992

elif dsrc in dirmove and dirmove[dsrc] != ddst:

977

# files from the same directory moved to two different places

993

# files from the same directory moved to two different places

978

invalid.add(dsrc)

994

invalid.add(dsrc)

979

else:

995

else:

980

# looks good so far

996

# looks good so far

981

dirmove[dsrc] = ddst

997

dirmove[dsrc] = ddst

982

998

983

for i in invalid:

999

for i in invalid:

984

if i in dirmove:

1000

if i in dirmove:

985

del dirmove[i]

1001

del dirmove[i]

986

del invalid

1002

del invalid

987

1003

988

if not dirmove:

1004

if not dirmove:

989

return {}, {}

1005

return {}, {}

990

1006

991

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1007

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

992

1008

993

for d in dirmove:

1009

for d in dirmove:

994

repo.ui.debug(

1010

repo.ui.debug(

995

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1011

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

996

)

1012

)

997

1013

998

movewithdir = {}

1014

movewithdir = {}

999

# check unaccounted nonoverlapping files against directory moves

1015

# check unaccounted nonoverlapping files against directory moves

1000

for f in addedfiles:

1016

for f in addedfilesfn():

1001

if f not in fullcopy:

1017

if f not in fullcopy:

1002

for d in dirmove:

1018

for d in dirmove:

1003

if f.startswith(d):

1019

if f.startswith(d):

1004

# new file added in a directory that was moved, move it

1020

# new file added in a directory that was moved, move it

1005

df = dirmove[d] + f[len(d) :]

1021

df = dirmove[d] + f[len(d) :]

1006

if df not in copy:

1022

if df not in copy:

1007

movewithdir[f] = df

1023

movewithdir[f] = df

1008

repo.ui.debug(

1024

repo.ui.debug(

1009

b" pending file src: '%s' -> dst: '%s'\n"

1025

b" pending file src: '%s' -> dst: '%s'\n"

1010

% (f, df)

1026

% (f, df)

1011

)

1027

)

1012

break

1028

break

1013

1029

1014

return dirmove, movewithdir

1030

return dirmove, movewithdir

1015

1031

1016

1032

1017

def _heuristicscopytracing(repo, c1, c2, base):

1033

def _heuristicscopytracing(repo, c1, c2, base):

1018

"""Fast copytracing using filename heuristics

1034

"""Fast copytracing using filename heuristics

1019

1035

1020

Assumes that moves or renames are of following two types:

1036

Assumes that moves or renames are of following two types:

1021

1037

1022

1) Inside a directory only (same directory name but different filenames)

1038

1) Inside a directory only (same directory name but different filenames)

1023

2) Move from one directory to another

1039

2) Move from one directory to another

1024

(same filenames but different directory names)

1040

(same filenames but different directory names)

1025

1041

1026

Works only when there are no merge commits in the "source branch".

1042

Works only when there are no merge commits in the "source branch".

1027

Source branch is commits from base up to c2 not including base.

1043

Source branch is commits from base up to c2 not including base.

1028

1044

1029

If merge is involved it fallbacks to _fullcopytracing().

1045

If merge is involved it fallbacks to _fullcopytracing().

1030

1046

1031

Can be used by setting the following config:

1047

Can be used by setting the following config:

1032

1048

1033

[experimental]

1049

[experimental]

1034

copytrace = heuristics

1050

copytrace = heuristics

1035

1051

1036

In some cases the copy/move candidates found by heuristics can be very large

1052

In some cases the copy/move candidates found by heuristics can be very large

1037

in number and that will make the algorithm slow. The number of possible

1053

in number and that will make the algorithm slow. The number of possible

1038

candidates to check can be limited by using the config

1054

candidates to check can be limited by using the config

1039

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1055

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1040

"""

1056

"""

1041

1057

1042

if c1.rev() is None:

1058

if c1.rev() is None:

1043

c1 = c1.p1()

1059

c1 = c1.p1()

1044

if c2.rev() is None:

1060

if c2.rev() is None:

1045

c2 = c2.p1()

1061

c2 = c2.p1()

1046

1062

1047

changedfiles = set()

1063

changedfiles = set()

1048

m1 = c1.manifest()

1064

m1 = c1.manifest()

1049

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1065

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1050

# If base is not in c2 branch, we switch to fullcopytracing

1066

# If base is not in c2 branch, we switch to fullcopytracing

1051

repo.ui.debug(

1067

repo.ui.debug(

1052

b"switching to full copytracing as base is not "

1068

b"switching to full copytracing as base is not "

1053

b"an ancestor of c2\n"

1069

b"an ancestor of c2\n"

1054

)

1070

)

1055

return _fullcopytracing(repo, c1, c2, base)

1071

return _fullcopytracing(repo, c1, c2, base)

1056

1072

1057

ctx = c2

1073

ctx = c2

1058

while ctx != base:

1074

while ctx != base:

1059

if len(ctx.parents()) == 2:

1075

if len(ctx.parents()) == 2:

1060

# To keep things simple let's not handle merges

1076

# To keep things simple let's not handle merges

1061

repo.ui.debug(b"switching to full copytracing because of merges\n")

1077

repo.ui.debug(b"switching to full copytracing because of merges\n")

1062

return _fullcopytracing(repo, c1, c2, base)

1078

return _fullcopytracing(repo, c1, c2, base)

1063

changedfiles.update(ctx.files())

1079

changedfiles.update(ctx.files())

1064

ctx = ctx.p1()

1080

ctx = ctx.p1()

1065

1081

1066

copies2 = {}

1082

copies2 = {}

1067

cp = _forwardcopies(base, c2)

1083

cp = _forwardcopies(base, c2)

1068

for dst, src in pycompat.iteritems(cp):

1084

for dst, src in pycompat.iteritems(cp):

1069

if src in m1:

1085

if src in m1:

1070

copies2[dst] = src

1086

copies2[dst] = src

1071

1087

1072

# file is missing if it isn't present in the destination, but is present in

1088

# file is missing if it isn't present in the destination, but is present in

1073

# the base and present in the source.

1089

# the base and present in the source.

1074

# Presence in the base is important to exclude added files, presence in the

1090

# Presence in the base is important to exclude added files, presence in the

1075

# source is important to exclude removed files.

1091

# source is important to exclude removed files.

1076

filt = lambda f: f not in m1 and f in base and f in c2

1092

filt = lambda f: f not in m1 and f in base and f in c2

1077

missingfiles = [f for f in changedfiles if filt(f)]

1093

missingfiles = [f for f in changedfiles if filt(f)]

1078

1094

1079

copies1 = {}

1095

copies1 = {}

1080

if missingfiles:

1096

if missingfiles:

1081

basenametofilename = collections.defaultdict(list)

1097

basenametofilename = collections.defaultdict(list)

1082

dirnametofilename = collections.defaultdict(list)

1098

dirnametofilename = collections.defaultdict(list)

1083

1099

1084

for f in m1.filesnotin(base.manifest()):

1100

for f in m1.filesnotin(base.manifest()):

1085

basename = os.path.basename(f)

1101

basename = os.path.basename(f)

1086

dirname = os.path.dirname(f)

1102

dirname = os.path.dirname(f)

1087

basenametofilename[basename].append(f)

1103

basenametofilename[basename].append(f)

1088

dirnametofilename[dirname].append(f)

1104

dirnametofilename[dirname].append(f)

1089

1105

1090

for f in missingfiles:

1106

for f in missingfiles:

1091

basename = os.path.basename(f)

1107

basename = os.path.basename(f)

1092

dirname = os.path.dirname(f)

1108

dirname = os.path.dirname(f)

1093

samebasename = basenametofilename[basename]

1109

samebasename = basenametofilename[basename]

1094

samedirname = dirnametofilename[dirname]

1110

samedirname = dirnametofilename[dirname]

1095

movecandidates = samebasename + samedirname

1111

movecandidates = samebasename + samedirname

1096

# f is guaranteed to be present in c2, that's why

1112

# f is guaranteed to be present in c2, that's why

1097

# c2.filectx(f) won't fail

1113

# c2.filectx(f) won't fail

1098

f2 = c2.filectx(f)

1114

f2 = c2.filectx(f)

1099

# we can have a lot of candidates which can slow down the heuristics

1115

# we can have a lot of candidates which can slow down the heuristics

1100

# config value to limit the number of candidates moves to check

1116

# config value to limit the number of candidates moves to check

1101

maxcandidates = repo.ui.configint(

1117

maxcandidates = repo.ui.configint(

1102

b'experimental', b'copytrace.movecandidateslimit'

1118

b'experimental', b'copytrace.movecandidateslimit'

1103

)

1119

)

1104

1120

1105

if len(movecandidates) > maxcandidates:

1121

if len(movecandidates) > maxcandidates:

1106

repo.ui.status(

1122

repo.ui.status(

1107

_(

1123

_(

1108

b"skipping copytracing for '%s', more "

1124

b"skipping copytracing for '%s', more "

1109

b"candidates than the limit: %d\n"

1125

b"candidates than the limit: %d\n"

1110

)

1126

)

1111

% (f, len(movecandidates))

1127

% (f, len(movecandidates))

1112

)

1128

)

1113

continue

1129

continue

1114

1130

1115

for candidate in movecandidates:

1131

for candidate in movecandidates:

1116

f1 = c1.filectx(candidate)

1132

f1 = c1.filectx(candidate)

1117

if _related(f1, f2):

1133

if _related(f1, f2):

1118

# if there are a few related copies then we'll merge

1134

# if there are a few related copies then we'll merge

1119

# changes into all of them. This matches the behaviour

1135

# changes into all of them. This matches the behaviour

1120

# of upstream copytracing

1136

# of upstream copytracing

1121

copies1[candidate] = f

1137

copies1[candidate] = f

1122

1138

1123

return branch_copies(copies1), branch_copies(copies2), {}

1139

return branch_copies(copies1), branch_copies(copies2), {}

1124

1140

1125

1141

1126

def _related(f1, f2):

1142

def _related(f1, f2):

1127

"""return True if f1 and f2 filectx have a common ancestor

1143

"""return True if f1 and f2 filectx have a common ancestor

1128

1144

1129

Walk back to common ancestor to see if the two files originate

1145

Walk back to common ancestor to see if the two files originate

1130

from the same file. Since workingfilectx's rev() is None it messes

1146

from the same file. Since workingfilectx's rev() is None it messes

1131

up the integer comparison logic, hence the pre-step check for

1147

up the integer comparison logic, hence the pre-step check for

1132

None (f1 and f2 can only be workingfilectx's initially).

1148

None (f1 and f2 can only be workingfilectx's initially).

1133

"""

1149

"""

1134

1150

1135

if f1 == f2:

1151

if f1 == f2:

1136

return True # a match

1152

return True # a match

1137

1153

1138

g1, g2 = f1.ancestors(), f2.ancestors()

1154

g1, g2 = f1.ancestors(), f2.ancestors()

1139

try:

1155

try:

1140

f1r, f2r = f1.linkrev(), f2.linkrev()

1156

f1r, f2r = f1.linkrev(), f2.linkrev()

1141

1157

1142

if f1r is None:

1158

if f1r is None:

1143

f1 = next(g1)

1159

f1 = next(g1)

1144

if f2r is None:

1160

if f2r is None:

1145

f2 = next(g2)

1161

f2 = next(g2)

1146

1162

1147

while True:

1163

while True:

1148

f1r, f2r = f1.linkrev(), f2.linkrev()

1164

f1r, f2r = f1.linkrev(), f2.linkrev()

1149

if f1r > f2r:

1165

if f1r > f2r:

1150

f1 = next(g1)

1166

f1 = next(g1)

1151

elif f2r > f1r:

1167

elif f2r > f1r:

1152

f2 = next(g2)

1168

f2 = next(g2)

1153

else: # f1 and f2 point to files in the same linkrev

1169

else: # f1 and f2 point to files in the same linkrev

1154

return f1 == f2 # true if they point to the same file

1170

return f1 == f2 # true if they point to the same file

1155

except StopIteration:

1171

except StopIteration:

1156

return False

1172

return False

1157

1173

1158

1174

1159

def graftcopies(wctx, ctx, base):

1175

def graftcopies(wctx, ctx, base):

1160

"""reproduce copies between base and ctx in the wctx

1176

"""reproduce copies between base and ctx in the wctx

1161

1177

1162

Unlike mergecopies(), this function will only consider copies between base

1178

Unlike mergecopies(), this function will only consider copies between base

1163

and ctx; it will ignore copies between base and wctx. Also unlike

1179

and ctx; it will ignore copies between base and wctx. Also unlike

1164

mergecopies(), this function will apply copies to the working copy (instead

1180

mergecopies(), this function will apply copies to the working copy (instead

1165

of just returning information about the copies). That makes it cheaper

1181

of just returning information about the copies). That makes it cheaper

1166

(especially in the common case of base==ctx.p1()) and useful also when

1182

(especially in the common case of base==ctx.p1()) and useful also when

1167

experimental.copytrace=off.

1183

experimental.copytrace=off.

1168

1184

1169

merge.update() will have already marked most copies, but it will only

1185

merge.update() will have already marked most copies, but it will only

1170

mark copies if it thinks the source files are related (see

1186

mark copies if it thinks the source files are related (see

1171

merge._related()). It will also not mark copies if the file wasn't modified

1187

merge._related()). It will also not mark copies if the file wasn't modified

1172

on the local side. This function adds the copies that were "missed"

1188

on the local side. This function adds the copies that were "missed"

1173

by merge.update().

1189

by merge.update().

1174

"""

1190

"""

1175

new_copies = pathcopies(base, ctx)

1191

new_copies = pathcopies(base, ctx)

1176

_filter(wctx.p1(), wctx, new_copies)

1192

_filter(wctx.p1(), wctx, new_copies)

1177

for dst, src in pycompat.iteritems(new_copies):

1193

for dst, src in pycompat.iteritems(new_copies):

1178

wctx[dst].markcopied(src)

1194

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 # A small cache to avoid doing the work twice for merges
                 #
                 # In the vast majority of cases, if we ask information for a revision
                 # about 1 parent, we'll later ask it for the other. So it make sense to
                 # keep the information around when reaching the first parent of a merge
                 # and dropping it after it was provided for the second parents.
                 #
                 # It exists cases were only one parent of the merge will be walked. It
                 # happens when the "destination" the copy tracing is descendant from a
                 # new root, not common with the "source". In that case, we will only walk
                 # through merge parents that are descendant of changesets common
                 # between "source" and "destination".
                 #
                 # With the current case implementation if such changesets have a copy
                 # information, we'll keep them in memory until the end of
                 # _changesetforwardcopies. We don't expect the case to be frequent
                 # enough to matters.
                 #
                 # In addition, it would be possible to reach pathological case, were
                 # many first parent are met before any second parent is reached. In
                 # that case the cache could grow. If this even become an issue one can
                 # safely introduce a maximum cache size. This would trade extra CPU/IO
                 # time to save memory.
                 merge_caches = {}
                 alwaysmatch = match.always()
                 if rustmod is not None and alwaysmatch:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         value = None
                         e = merge_caches.pop(rev, None)
                         if e is not None:
                             return e
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         value = (p1, p2, raw)
                         if p1 != node.nullrev and p2 != node.nullrev:
                             # XXX some case we over cache, IGNORE
                             merge_caches[rev] = value
                         return value
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         value = None
                         e = merge_caches.pop(rev, None)
                         if e is not None:
                             return e
                         changes = None
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         value = (p1, p2, changes)
                         if p1 != node.nullrev and p2 != node.nullrev:
                             # XXX some case we over cache, IGNORE
                             merge_caches[rev] = value
                         return value
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 if repo.filecopiesmode == b'changeset-sidedata':
                     revinfo = _revinfo_getter(repo, match)
                     return _combine_changeset_copies(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None and alwaysmatch:
                     return rustmod.combine_changeset_copies(
                         list(revs), children, targetrev, revinfo, isancestor
                     )
                 isancestor = cached_is_ancestor(isancestor)
                 all_copies = {}
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, changes = revinfo(c)
                         childcopies = {}
                         if r == p1:
                             parent = 1
                             if changes is not None:
                                 childcopies = changes.copied_from_p1
                         else:
                             assert r == p2
                             parent = 2
                             if changes is not None:
                                 childcopies = changes.copied_from_p2
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         if changes is not None and changes.removed:
                             if newcopies is copies:
                                 newcopies = copies.copy()
                             for f in changes.removed:
                                 if f in newcopies:
                                     if newcopies is copies:
                                         # copy on write to avoid affecting potential other
                                         # branches.  when there are no other branches, this
                                         # could be avoided.
                                         newcopies = copies.copy()
                                     newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         elif newcopies is othercopies:
                             # nothing to merge:
                             pass
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 if newcopies is copies:
                                     newcopies = copies.copy()
                                 minor, major = othercopies, newcopies
                             else:
                                 # we do not know if the other dict is a copy or not, so we
                                 # need to blindly copy it. Future change should make this
                                 # unnecessary.
                                 minor, major = newcopies, othercopies.copy()
                             copies = _merge_copies_dict(minor, major, isancestor, changes)
                             all_copies[c] = copies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if new_tt == other_tt:
                             minor[dest] = value
                         elif (
                             changes is not None
                             and value[1] is None
                             and dest in changes.salvaged
                         ):
                             pass
                         elif (
                             changes is not None
                             and other[1] is None
                             and dest in changes.salvaged
                         ):
                             minor[dest] = value
                         elif changes is not None and dest in changes.merged:
                             minor[dest] = value
                         elif not isancestor(new_tt, other_tt):
                             if value[1] is not None:
                                 minor[dest] = value
                             elif isancestor(other_tt, new_tt):
                                 minor[dest] = value
                 return minor
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
-                addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
+                cache = []
-                addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
-                u1 = sorted(addedinm1 - addedinm2)
+                def _get_addedfiles(idx):
-                u2 = sorted(addedinm2 - addedinm1)
+                    if not cache:
+                        addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
+                        addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
+                        u1 = sorted(addedinm1 - addedinm2)
+                        u2 = sorted(addedinm2 - addedinm1)
+                        cache.extend((u1, u2))
+                    return cache[idx]
-                header = b"  unmatched files in %s"
+                u1fn = lambda: _get_addedfiles(0)
-                if u1:
+                u2fn = lambda: _get_addedfiles(1)
-                    repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
+                if repo.ui.debugflag:
-                if u2:
+                    u1 = u1fn()
-                    repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
+                    u2 = u2fn()
-                if repo.ui.debugflag:
+                    header = b"  unmatched files in %s"
+                    if u1:
+                        repo.ui.debug(
+                            b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
+                        )
+                    if u2:
+                        repo.ui.debug(
+                            b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
+                        )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
-                dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
+                dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
-                dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
+                dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
-            def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
+            def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
-                addedfiles: added files on the other side (compared to ctx)
+                addedfilesfn: function returning added files on the other side (compared to
+                              ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
-                for f in addedfiles:
+                for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)