upstream/mercurial-mirror Commit - r46600:f9f8d8aa

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

policy,

20

policy,

21

pycompat,

21

pycompat,

22

util,

22

util,

23

)

23

)

24

25

26

from .utils import stringutil

26

from .utils import stringutil

27

28

from .revlogutils import flagutil

28

from .revlogutils import flagutil

29

30

rustmod = policy.importrust("copy_tracing")

30

rustmod = policy.importrust("copy_tracing")

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(prefix, suffix):

67

def _chain(prefix, suffix):

68

"""chain two sets of copies 'prefix' and 'suffix'"""

68

"""chain two sets of copies 'prefix' and 'suffix'"""

69

result = prefix.copy()

69

result = prefix.copy()

70

for key, value in pycompat.iteritems(suffix):

70

for key, value in pycompat.iteritems(suffix):

71

result[key] = prefix.get(value, value)

71

result[key] = prefix.get(value, value)

72

return result

72

return result

73

74

75

def _tracefile(fctx, am, basemf):

75

def _tracefile(fctx, am, basemf):

76

"""return file context that is the ancestor of fctx present in ancestor

76

"""return file context that is the ancestor of fctx present in ancestor

77

manifest am

77

manifest am

78

79

Note: we used to try and stop after a given limit, however checking if that

79

Note: we used to try and stop after a given limit, however checking if that

80

limit is reached turned out to be very expensive. we are better off

80

limit is reached turned out to be very expensive. we are better off

81

disabling that feature."""

81

disabling that feature."""

82

83

for f in fctx.ancestors():

83

for f in fctx.ancestors():

84

path = f.path()

84

path = f.path()

85

if am.get(path, None) == f.filenode():

85

if am.get(path, None) == f.filenode():

86

return path

86

return path

87

if basemf and basemf.get(path, None) == f.filenode():

87

if basemf and basemf.get(path, None) == f.filenode():

88

return path

88

return path

89

90

91

def _dirstatecopies(repo, match=None):

91

def _dirstatecopies(repo, match=None):

92

ds = repo.dirstate

92

ds = repo.dirstate

93

c = ds.copies().copy()

93

c = ds.copies().copy()

94

for k in list(c):

94

for k in list(c):

95

if ds[k] not in b'anm' or (match and not match(k)):

95

if ds[k] not in b'anm' or (match and not match(k)):

96

del c[k]

96

del c[k]

97

return c

97

return c

98

99

100

def _computeforwardmissing(a, b, match=None):

100

def _computeforwardmissing(a, b, match=None):

101

"""Computes which files are in b but not a.

101

"""Computes which files are in b but not a.

102

This is its own function so extensions can easily wrap this call to see what

102

This is its own function so extensions can easily wrap this call to see what

103

files _forwardcopies is about to process.

103

files _forwardcopies is about to process.

104

"""

104

"""

105

ma = a.manifest()

105

ma = a.manifest()

106

mb = b.manifest()

106

mb = b.manifest()

107

return mb.filesnotin(ma, match=match)

107

return mb.filesnotin(ma, match=match)

108

109

110

def usechangesetcentricalgo(repo):

110

def usechangesetcentricalgo(repo):

111

"""Checks if we should use changeset-centric copy algorithms"""

111

"""Checks if we should use changeset-centric copy algorithms"""

112

if repo.filecopiesmode == b'changeset-sidedata':

112

if repo.filecopiesmode == b'changeset-sidedata':

113

return True

113

return True

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

115

changesetsource = (b'changeset-only', b'compatibility')

115

changesetsource = (b'changeset-only', b'compatibility')

116

return readfrom in changesetsource

116

return readfrom in changesetsource

117

118

119

def _committedforwardcopies(a, b, base, match):

119

def _committedforwardcopies(a, b, base, match):

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

121

# files might have to be traced back to the fctx parent of the last

121

# files might have to be traced back to the fctx parent of the last

122

# one-side-only changeset, but not further back than that

122

# one-side-only changeset, but not further back than that

123

repo = a._repo

123

repo = a._repo

124

125

if usechangesetcentricalgo(repo):

125

if usechangesetcentricalgo(repo):

126

return _changesetforwardcopies(a, b, match)

126

return _changesetforwardcopies(a, b, match)

127

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

129

dbg = repo.ui.debug

129

dbg = repo.ui.debug

130

if debug:

130

if debug:

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

132

am = a.manifest()

132

am = a.manifest()

133

basemf = None if base is None else base.manifest()

133

basemf = None if base is None else base.manifest()

134

135

# find where new files came from

135

# find where new files came from

136

# we currently don't try to find where old files went, too expensive

136

# we currently don't try to find where old files went, too expensive

137

# this means we can miss a case like 'hg rm b; hg cp a b'

137

# this means we can miss a case like 'hg rm b; hg cp a b'

138

cm = {}

138

cm = {}

139

140

# Computing the forward missing is quite expensive on large manifests, since

140

# Computing the forward missing is quite expensive on large manifests, since

141

# it compares the entire manifests. We can optimize it in the common use

141

# it compares the entire manifests. We can optimize it in the common use

142

# case of computing what copies are in a commit versus its parent (like

142

# case of computing what copies are in a commit versus its parent (like

143

# during a rebase or histedit). Note, we exclude merge commits from this

143

# during a rebase or histedit). Note, we exclude merge commits from this

144

# optimization, since the ctx.files() for a merge commit is not correct for

144

# optimization, since the ctx.files() for a merge commit is not correct for

145

# this comparison.

145

# this comparison.

146

forwardmissingmatch = match

146

forwardmissingmatch = match

147

if b.p1() == a and b.p2().node() == node.nullid:

147

if b.p1() == a and b.p2().node() == node.nullid:

148

filesmatcher = matchmod.exact(b.files())

148

filesmatcher = matchmod.exact(b.files())

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

151

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

153

154

if debug:

154

if debug:

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

156

157

for f in sorted(missing):

157

for f in sorted(missing):

158

if debug:

158

if debug:

159

dbg(b'debug.copies: tracing file: %s\n' % f)

159

dbg(b'debug.copies: tracing file: %s\n' % f)

160

fctx = b[f]

160

fctx = b[f]

161

fctx._ancestrycontext = ancestrycontext

161

fctx._ancestrycontext = ancestrycontext

162

163

if debug:

163

if debug:

164

start = util.timer()

164

start = util.timer()

165

opath = _tracefile(fctx, am, basemf)

165

opath = _tracefile(fctx, am, basemf)

166

if opath:

166

if opath:

167

if debug:

167

if debug:

168

dbg(b'debug.copies: rename of: %s\n' % opath)

168

dbg(b'debug.copies: rename of: %s\n' % opath)

169

cm[f] = opath

169

cm[f] = opath

170

if debug:

170

if debug:

171

dbg(

171

dbg(

172

b'debug.copies: time: %f seconds\n'

172

b'debug.copies: time: %f seconds\n'

173

% (util.timer() - start)

173

% (util.timer() - start)

174

)

174

)

175

return cm

175

return cm

176

177

178

def _revinfo_getter(repo):

178

def _revinfo_getter(repo):

179

"""returns a function that returns the following data given a <rev>"

179

"""returns a function that returns the following data given a <rev>"

180

181

* p1: revision number of first parent

181

* p1: revision number of first parent

182

* p2: revision number of first parent

182

* p2: revision number of first parent

183

* changes: a ChangingFiles object

183

* changes: a ChangingFiles object

184

"""

184

"""

185

cl = repo.changelog

185

cl = repo.changelog

186

parents = cl.parentrevs

186

parents = cl.parentrevs

187

flags = cl.flags

187

flags = cl.flags

188

189

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

189

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

190

191

changelogrevision = cl.changelogrevision

191

changelogrevision = cl.changelogrevision

192

193

# A small cache to avoid doing the work twice for merges

193

# A small cache to avoid doing the work twice for merges

194

#

194

#

195

# In the vast majority of cases, if we ask information for a revision

195

# In the vast majority of cases, if we ask information for a revision

196

# about 1 parent, we'll later ask it for the other. So it make sense to

196

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# keep the information around when reaching the first parent of a merge

197

# keep the information around when reaching the first parent of a merge

198

# and dropping it after it was provided for the second parents.

198

# and dropping it after it was provided for the second parents.

199

#

199

#

200

# It exists cases were only one parent of the merge will be walked. It

200

# It exists cases were only one parent of the merge will be walked. It

201

# happens when the "destination" the copy tracing is descendant from a

201

# happens when the "destination" the copy tracing is descendant from a

202

# new root, not common with the "source". In that case, we will only walk

202

# new root, not common with the "source". In that case, we will only walk

203

# through merge parents that are descendant of changesets common

203

# through merge parents that are descendant of changesets common

204

# between "source" and "destination".

204

# between "source" and "destination".

205

#

205

#

206

# With the current case implementation if such changesets have a copy

206

# With the current case implementation if such changesets have a copy

207

# information, we'll keep them in memory until the end of

207

# information, we'll keep them in memory until the end of

208

# _changesetforwardcopies. We don't expect the case to be frequent

208

# _changesetforwardcopies. We don't expect the case to be frequent

209

# enough to matters.

209

# enough to matters.

210

#

210

#

211

# In addition, it would be possible to reach pathological case, were

211

# In addition, it would be possible to reach pathological case, were

212

# many first parent are met before any second parent is reached. In

212

# many first parent are met before any second parent is reached. In

213

# that case the cache could grow. If this even become an issue one can

213

# that case the cache could grow. If this even become an issue one can

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# time to save memory.

215

# time to save memory.

216

merge_caches = {}

216

merge_caches = {}

217

218

def revinfo(rev):

218

def revinfo(rev):

219

p1, p2 = parents(rev)

219

p1, p2 = parents(rev)

220

value = None

220

value = None

221

e = merge_caches.pop(rev, None)

221

e = merge_caches.pop(rev, None)

222

if e is not None:

222

if e is not None:

223

return e

223

return e

224

changes = None

224

changes = None

225

if flags(rev) & HASCOPIESINFO:

225

if flags(rev) & HASCOPIESINFO:

226

changes = changelogrevision(rev).changes

226

changes = changelogrevision(rev).changes

227

value = (p1, p2, changes)

227

value = (p1, p2, changes)

228

if p1 != node.nullrev and p2 != node.nullrev:

228

if p1 != node.nullrev and p2 != node.nullrev:

229

# XXX some case we over cache, IGNORE

229

# XXX some case we over cache, IGNORE

230

merge_caches[rev] = value

230

merge_caches[rev] = value

231

return value

231

return value

232

233

return revinfo

233

return revinfo

234

235

236

def cached_is_ancestor(is_ancestor):

236

def cached_is_ancestor(is_ancestor):

237

"""return a cached version of is_ancestor"""

237

"""return a cached version of is_ancestor"""

238

cache = {}

238

cache = {}

239

240

def _is_ancestor(anc, desc):

240

def _is_ancestor(anc, desc):

241

if anc > desc:

241

if anc > desc:

242

return False

242

return False

243

elif anc == desc:

243

elif anc == desc:

244

return True

244

return True

245

key = (anc, desc)

245

key = (anc, desc)

246

ret = cache.get(key)

246

ret = cache.get(key)

247

if ret is None:

247

if ret is None:

248

ret = cache[key] = is_ancestor(anc, desc)

248

ret = cache[key] = is_ancestor(anc, desc)

249

return ret

249

return ret

250

251

return _is_ancestor

251

return _is_ancestor

252

253

254

def _changesetforwardcopies(a, b, match):

254

def _changesetforwardcopies(a, b, match):

255

if a.rev() in (node.nullrev, b.rev()):

255

if a.rev() in (node.nullrev, b.rev()):

256

return {}

256

return {}

257

258

repo = a.repo().unfiltered()

258

repo = a.repo().unfiltered()

259

children = {}

259

children = {}

260

261

cl = repo.changelog

261

cl = repo.changelog

262

isancestor = cl.isancestorrev

262

isancestor = cl.isancestorrev

263

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

263

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

264

mrset = set(missingrevs)

264

mrset = set(missingrevs)

265

roots = set()

265

roots = set()

266

for r in missingrevs:

266

for r in missingrevs:

267

for p in cl.parentrevs(r):

267

for p in cl.parentrevs(r):

268

if p == node.nullrev:

268

if p == node.nullrev:

269

continue

269

continue

270

if p not in children:

270

if p not in children:

271

children[p] = [r]

271

children[p] = [r]

272

else:

272

else:

273

children[p].append(r)

273

children[p].append(r)

274

if p not in mrset:

274

if p not in mrset:

275

roots.add(p)

275

roots.add(p)

276

if not roots:

276

if not roots:

277

# no common revision to track copies from

277

# no common revision to track copies from

278

return {}

278

return {}

279

min_root = min(roots)

279

min_root = min(roots)

280

281

from_head = set(

281

from_head = set(

282

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

282

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

283

)

283

)

284

285

iterrevs = set(from_head)

285

iterrevs = set(from_head)

286

iterrevs &= mrset

286

iterrevs &= mrset

287

iterrevs.update(roots)

287

iterrevs.update(roots)

288

iterrevs.remove(b.rev())

288

iterrevs.remove(b.rev())

289

revs = sorted(iterrevs)

289

revs = sorted(iterrevs)

290

291

if repo.filecopiesmode == b'changeset-sidedata':

291

if repo.filecopiesmode == b'changeset-sidedata':

292

revinfo = _revinfo_getter(repo)

292

revinfo = _revinfo_getter(repo)

293

return _combine_changeset_copies(

293

return _combine_changeset_copies(

294

revs, children, b.rev(), revinfo, match, isancestor

294

revs, children, b.rev(), revinfo, match, isancestor

295

)

295

)

296

else:

296

else:

297

revinfo = _revinfo_getter_extra(repo)

297

revinfo = _revinfo_getter_extra(repo)

298

return _combine_changeset_copies_extra(

298

return _combine_changeset_copies_extra(

299

revs, children, b.rev(), revinfo, match, isancestor

299

revs, children, b.rev(), revinfo, match, isancestor

300

)

300

)

301

302

303

def _combine_changeset_copies(

303

def _combine_changeset_copies(

304

revs, children, targetrev, revinfo, match, isancestor

304

revs, children, targetrev, revinfo, match, isancestor

305

):

305

):

306

"""combine the copies information for each item of iterrevs

306

"""combine the copies information for each item of iterrevs

307

308

revs: sorted iterable of revision to visit

308

revs: sorted iterable of revision to visit

309

children: a {parent: [children]} mapping.

309

children: a {parent: [children]} mapping.

310

targetrev: the final copies destination revision (not in iterrevs)

310

targetrev: the final copies destination revision (not in iterrevs)

311

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

311

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

312

match: a matcher

312

match: a matcher

313

314

It returns the aggregated copies information for `targetrev`.

314

It returns the aggregated copies information for `targetrev`.

315

"""

315

"""

316

317

alwaysmatch = match.always()

317

alwaysmatch = match.always()

318

319

if rustmod is not None and alwaysmatch:

319

if rustmod is not None and alwaysmatch:

320

return rustmod.combine_changeset_copies(

320

return rustmod.combine_changeset_copies(

321

list(revs), children, targetrev, revinfo, isancestor

321

list(revs), children, targetrev, revinfo, isancestor

322

)

322

)

323

324

isancestor = cached_is_ancestor(isancestor)

324

isancestor = cached_is_ancestor(isancestor)

325

326

all_copies = {}

326

all_copies = {}

327

for r in revs:

327

for r in revs:

328

copies = all_copies.pop(r, None)

328

copies = all_copies.pop(r, None)

329

if copies is None:

329

if copies is None:

330

# this is a root

330

# this is a root

331

copies = {}

331

copies = {}

332

for i, c in enumerate(children[r]):

332

for i, c in enumerate(children[r]):

333

p1, p2, changes = revinfo(c)

333

p1, p2, changes = revinfo(c)

334

childcopies = {}

334

childcopies = {}

335

if r == p1:

335

if r == p1:

336

parent = 1

336

parent = 1

337

if changes is not None:

337

if changes is not None:

338

childcopies = changes.copied_from_p1

338

childcopies = changes.copied_from_p1

339

else:

339

else:

340

assert r == p2

340

assert r == p2

341

parent = 2

341

parent = 2

342

if changes is not None:

342

if changes is not None:

343

childcopies = changes.copied_from_p2

343

childcopies = changes.copied_from_p2

344

if not alwaysmatch:

344

if not alwaysmatch:

345

childcopies = {

345

childcopies = {

346

dst: src for dst, src in childcopies.items() if match(dst)

346

dst: src for dst, src in childcopies.items() if match(dst)

347

}

347

}

348

newcopies = copies

348

newcopies = copies

349

if childcopies:

349

if childcopies:

350

newcopies = copies.copy()

350

newcopies = copies.copy()

351

for dest, source in pycompat.iteritems(childcopies):

351

for dest, source in pycompat.iteritems(childcopies):

352

prev = copies.get(source)

352

prev = copies.get(source)

353

if prev is not None and prev[1] is not None:

353

if prev is not None and prev[1] is not None:

354

source = prev[1]

354

source = prev[1]

355

newcopies[dest] = (c, source)

355

newcopies[dest] = (c, source)

356

assert newcopies is not copies

356

assert newcopies is not copies

357

if changes is not None and changes.removed:

357

if changes is not None and changes.removed:

358

if newcopies is copies:

358

if newcopies is copies:

359

newcopies = copies.copy()

359

newcopies = copies.copy()

360

for f in changes.removed:

360

for f in changes.removed:

361

if f in newcopies:

361

if f in newcopies:

362

if newcopies is copies:

362

if newcopies is copies:

363

# copy on write to avoid affecting potential other

363

# copy on write to avoid affecting potential other

364

# branches. when there are no other branches, this

364

# branches. when there are no other branches, this

365

# could be avoided.

365

# could be avoided.

366

newcopies = copies.copy()

366

newcopies = copies.copy()

367

newcopies[f] = (c, None)

367

newcopies[f] = (c, None)

368

othercopies = all_copies.get(c)

368

othercopies = all_copies.get(c)

369

if othercopies is None:

369

if othercopies is None:

370

all_copies[c] = newcopies

370

all_copies[c] = newcopies

371

elif newcopies is othercopies:

371

elif newcopies is othercopies:

372

# nothing to merge:

372

# nothing to merge:

373

pass

373

pass

374

else:

374

else:

375

# we are the second parent to work on c, we need to merge our

375

# we are the second parent to work on c, we need to merge our

376

# work with the other.

376

# work with the other.

377

#

377

#

378

# In case of conflict, parent 1 take precedence over parent 2.

378

# In case of conflict, parent 1 take precedence over parent 2.

379

# This is an arbitrary choice made anew when implementing

379

# This is an arbitrary choice made anew when implementing

380

# changeset based copies. It was made without regards with

380

# changeset based copies. It was made without regards with

381

# potential filelog related behavior.

381

# potential filelog related behavior.

382

if parent == 1:

382

if parent == 1:

383

minor, major = othercopies, newcopies

383

minor, major = othercopies, newcopies

384

else:

384

else:

385

minor, major = newcopies, othercopies

385

minor, major = newcopies, othercopies

386

_merge_copies_dict(minor, major, isancestor, changes)

386

copies = _merge_copies_dict(minor, major, isancestor, changes)

387

all_copies[c] = ~~minor~~

387

all_copies[c] = copies

388

389

final_copies = {}

389

final_copies = {}

390

for dest, (tt, source) in all_copies[targetrev].items():

390

for dest, (tt, source) in all_copies[targetrev].items():

391

if source is not None:

391

if source is not None:

392

final_copies[dest] = source

392

final_copies[dest] = source

393

return final_copies

393

return final_copies

394

395

396

def _merge_copies_dict(minor, major, isancestor, changes):

396

def _merge_copies_dict(minor, major, isancestor, changes):

397

"""merge two copies-mapping together, minor and major

397

"""merge two copies-mapping together, minor and major

398

399

In case of conflict, value from "major" will be picked.

399

In case of conflict, value from "major" will be picked.

400

401

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

401

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

402

ancestors of `high_rev`,

402

ancestors of `high_rev`,

403

404

- `ismerged(path)`: callable return True if `path` have been merged in the

404

- `ismerged(path)`: callable return True if `path` have been merged in the

405

current revision,

405

current revision,

406

407

return the resulting dict (in practice, the "minor" object, updated)

406

"""

408

"""

407

for dest, value in major.items():

409

for dest, value in major.items():

408

other = minor.get(dest)

410

other = minor.get(dest)

409

if other is None:

411

if other is None:

410

minor[dest] = value

412

minor[dest] = value

411

else:

413

else:

412

new_tt = value[0]

414

new_tt = value[0]

413

other_tt = other[0]

415

other_tt = other[0]

414

if value[1] == other[1]:

416

if value[1] == other[1]:

415

continue

417

continue

416

# content from "major" wins, unless it is older

418

# content from "major" wins, unless it is older

417

# than the branch point or there is a merge

419

# than the branch point or there is a merge

418

if new_tt == other_tt:

420

if new_tt == other_tt:

419

minor[dest] = value

421

minor[dest] = value

420

elif (

422

elif (

421

changes is not None

423

changes is not None

422

and value[1] is None

424

and value[1] is None

423

and dest in changes.salvaged

425

and dest in changes.salvaged

424

):

426

):

425

pass

427

pass

426

elif (

428

elif (

427

changes is not None

429

changes is not None

428

and other[1] is None

430

and other[1] is None

429

and dest in changes.salvaged

431

and dest in changes.salvaged

430

):

432

):

431

minor[dest] = value

433

minor[dest] = value

432

elif changes is not None and dest in changes.merged:

434

elif changes is not None and dest in changes.merged:

433

minor[dest] = value

435

minor[dest] = value

434

elif not isancestor(new_tt, other_tt):

436

elif not isancestor(new_tt, other_tt):

435

if value[1] is not None:

437

if value[1] is not None:

436

minor[dest] = value

438

minor[dest] = value

437

elif isancestor(other_tt, new_tt):

439

elif isancestor(other_tt, new_tt):

438

minor[dest] = value

440

minor[dest] = value

441

return minor

439

442

440

443

441

def _revinfo_getter_extra(repo):

444

def _revinfo_getter_extra(repo):

442

"""return a function that return multiple data given a <rev>"i

445

"""return a function that return multiple data given a <rev>"i

443

446

444

* p1: revision number of first parent

447

* p1: revision number of first parent

445

* p2: revision number of first parent

448

* p2: revision number of first parent

446

* p1copies: mapping of copies from p1

449

* p1copies: mapping of copies from p1

447

* p2copies: mapping of copies from p2

450

* p2copies: mapping of copies from p2

448

* removed: a list of removed files

451

* removed: a list of removed files

449

* ismerged: a callback to know if file was merged in that revision

452

* ismerged: a callback to know if file was merged in that revision

450

"""

453

"""

451

cl = repo.changelog

454

cl = repo.changelog

452

parents = cl.parentrevs

455

parents = cl.parentrevs

453

456

454

def get_ismerged(rev):

457

def get_ismerged(rev):

455

ctx = repo[rev]

458

ctx = repo[rev]

456

459

457

def ismerged(path):

460

def ismerged(path):

458

if path not in ctx.files():

461

if path not in ctx.files():

459

return False

462

return False

460

fctx = ctx[path]

463

fctx = ctx[path]

461

parents = fctx._filelog.parents(fctx._filenode)

464

parents = fctx._filelog.parents(fctx._filenode)

462

nb_parents = 0

465

nb_parents = 0

463

for n in parents:

466

for n in parents:

464

if n != node.nullid:

467

if n != node.nullid:

465

nb_parents += 1

468

nb_parents += 1

466

return nb_parents >= 2

469

return nb_parents >= 2

467

470

468

return ismerged

471

return ismerged

469

472

470

def revinfo(rev):

473

def revinfo(rev):

471

p1, p2 = parents(rev)

474

p1, p2 = parents(rev)

472

ctx = repo[rev]

475

ctx = repo[rev]

473

p1copies, p2copies = ctx._copies

476

p1copies, p2copies = ctx._copies

474

removed = ctx.filesremoved()

477

removed = ctx.filesremoved()

475

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

478

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

476

479

477

return revinfo

480

return revinfo

478

481

479

482

480

def _combine_changeset_copies_extra(

483

def _combine_changeset_copies_extra(

481

revs, children, targetrev, revinfo, match, isancestor

484

revs, children, targetrev, revinfo, match, isancestor

482

):

485

):

483

"""version of `_combine_changeset_copies` that works with the Google

486

"""version of `_combine_changeset_copies` that works with the Google

484

specific "extra" based storage for copy information"""

487

specific "extra" based storage for copy information"""

485

all_copies = {}

488

all_copies = {}

486

alwaysmatch = match.always()

489

alwaysmatch = match.always()

487

for r in revs:

490

for r in revs:

488

copies = all_copies.pop(r, None)

491

copies = all_copies.pop(r, None)

489

if copies is None:

492

if copies is None:

490

# this is a root

493

# this is a root

491

copies = {}

494

copies = {}

492

for i, c in enumerate(children[r]):

495

for i, c in enumerate(children[r]):

493

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

496

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

494

if r == p1:

497

if r == p1:

495

parent = 1

498

parent = 1

496

childcopies = p1copies

499

childcopies = p1copies

497

else:

500

else:

498

assert r == p2

501

assert r == p2

499

parent = 2

502

parent = 2

500

childcopies = p2copies

503

childcopies = p2copies

501

if not alwaysmatch:

504

if not alwaysmatch:

502

childcopies = {

505

childcopies = {

503

dst: src for dst, src in childcopies.items() if match(dst)

506

dst: src for dst, src in childcopies.items() if match(dst)

504

}

507

}

505

newcopies = copies

508

newcopies = copies

506

if childcopies:

509

if childcopies:

507

newcopies = copies.copy()

510

newcopies = copies.copy()

508

for dest, source in pycompat.iteritems(childcopies):

511

for dest, source in pycompat.iteritems(childcopies):

509

prev = copies.get(source)

512

prev = copies.get(source)

510

if prev is not None and prev[1] is not None:

513

if prev is not None and prev[1] is not None:

511

source = prev[1]

514

source = prev[1]

512

newcopies[dest] = (c, source)

515

newcopies[dest] = (c, source)

513

assert newcopies is not copies

516

assert newcopies is not copies

514

for f in removed:

517

for f in removed:

515

if f in newcopies:

518

if f in newcopies:

516

if newcopies is copies:

519

if newcopies is copies:

517

# copy on write to avoid affecting potential other

520

# copy on write to avoid affecting potential other

518

# branches. when there are no other branches, this

521

# branches. when there are no other branches, this

519

# could be avoided.

522

# could be avoided.

520

newcopies = copies.copy()

523

newcopies = copies.copy()

521

newcopies[f] = (c, None)

524

newcopies[f] = (c, None)

522

othercopies = all_copies.get(c)

525

othercopies = all_copies.get(c)

523

if othercopies is None:

526

if othercopies is None:

524

all_copies[c] = newcopies

527

all_copies[c] = newcopies

525

else:

528

else:

526

# we are the second parent to work on c, we need to merge our

529

# we are the second parent to work on c, we need to merge our

527

# work with the other.

530

# work with the other.

528

#

531

#

529

# In case of conflict, parent 1 take precedence over parent 2.

532

# In case of conflict, parent 1 take precedence over parent 2.

530

# This is an arbitrary choice made anew when implementing

533

# This is an arbitrary choice made anew when implementing

531

# changeset based copies. It was made without regards with

534

# changeset based copies. It was made without regards with

532

# potential filelog related behavior.

535

# potential filelog related behavior.

533

if parent == 1:

536

if parent == 1:

534

_merge_copies_dict_extra(

537

_merge_copies_dict_extra(

535

othercopies, newcopies, isancestor, ismerged

538

othercopies, newcopies, isancestor, ismerged

536

)

539

)

537

else:

540

else:

538

_merge_copies_dict_extra(

541

_merge_copies_dict_extra(

539

newcopies, othercopies, isancestor, ismerged

542

newcopies, othercopies, isancestor, ismerged

540

)

543

)

541

all_copies[c] = newcopies

544

all_copies[c] = newcopies

542

545

543

final_copies = {}

546

final_copies = {}

544

for dest, (tt, source) in all_copies[targetrev].items():

547

for dest, (tt, source) in all_copies[targetrev].items():

545

if source is not None:

548

if source is not None:

546

final_copies[dest] = source

549

final_copies[dest] = source

547

return final_copies

550

return final_copies

548

551

549

552

550

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

553

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

551

"""version of `_merge_copies_dict` that works with the Google

554

"""version of `_merge_copies_dict` that works with the Google

552

specific "extra" based storage for copy information"""

555

specific "extra" based storage for copy information"""

553

for dest, value in major.items():

556

for dest, value in major.items():

554

other = minor.get(dest)

557

other = minor.get(dest)

555

if other is None:

558

if other is None:

556

minor[dest] = value

559

minor[dest] = value

557

else:

560

else:

558

new_tt = value[0]

561

new_tt = value[0]

559

other_tt = other[0]

562

other_tt = other[0]

560

if value[1] == other[1]:

563

if value[1] == other[1]:

561

continue

564

continue

562

# content from "major" wins, unless it is older

565

# content from "major" wins, unless it is older

563

# than the branch point or there is a merge

566

# than the branch point or there is a merge

564

if (

567

if (

565

new_tt == other_tt

568

new_tt == other_tt

566

or not isancestor(new_tt, other_tt)

569

or not isancestor(new_tt, other_tt)

567

or ismerged(dest)

570

or ismerged(dest)

568

):

571

):

569

minor[dest] = value

572

minor[dest] = value

570

573

571

574

572

def _forwardcopies(a, b, base=None, match=None):

575

def _forwardcopies(a, b, base=None, match=None):

573

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

576

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

574

577

575

if base is None:

578

if base is None:

576

base = a

579

base = a

577

match = a.repo().narrowmatch(match)

580

match = a.repo().narrowmatch(match)

578

# check for working copy

581

# check for working copy

579

if b.rev() is None:

582

if b.rev() is None:

580

cm = _committedforwardcopies(a, b.p1(), base, match)

583

cm = _committedforwardcopies(a, b.p1(), base, match)

581

# combine copies from dirstate if necessary

584

# combine copies from dirstate if necessary

582

copies = _chain(cm, _dirstatecopies(b._repo, match))

585

copies = _chain(cm, _dirstatecopies(b._repo, match))

583

else:

586

else:

584

copies = _committedforwardcopies(a, b, base, match)

587

copies = _committedforwardcopies(a, b, base, match)

585

return copies

588

return copies

586

589

587

590

588

def _backwardrenames(a, b, match):

591

def _backwardrenames(a, b, match):

589

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

592

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

590

return {}

593

return {}

591

594

592

# Even though we're not taking copies into account, 1:n rename situations

595

# Even though we're not taking copies into account, 1:n rename situations

593

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

596

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

594

# arbitrarily pick one of the renames.

597

# arbitrarily pick one of the renames.

595

# We don't want to pass in "match" here, since that would filter

598

# We don't want to pass in "match" here, since that would filter

596

# the destination by it. Since we're reversing the copies, we want

599

# the destination by it. Since we're reversing the copies, we want

597

# to filter the source instead.

600

# to filter the source instead.

598

f = _forwardcopies(b, a)

601

f = _forwardcopies(b, a)

599

r = {}

602

r = {}

600

for k, v in sorted(pycompat.iteritems(f)):

603

for k, v in sorted(pycompat.iteritems(f)):

601

if match and not match(v):

604

if match and not match(v):

602

continue

605

continue

603

# remove copies

606

# remove copies

604

if v in a:

607

if v in a:

605

continue

608

continue

606

r[v] = k

609

r[v] = k

607

return r

610

return r

608

611

609

612

610

def pathcopies(x, y, match=None):

613

def pathcopies(x, y, match=None):

611

"""find {dst@y: src@x} copy mapping for directed compare"""

614

"""find {dst@y: src@x} copy mapping for directed compare"""

612

repo = x._repo

615

repo = x._repo

613

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

616

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

614

if debug:

617

if debug:

615

repo.ui.debug(

618

repo.ui.debug(

616

b'debug.copies: searching copies from %s to %s\n' % (x, y)

619

b'debug.copies: searching copies from %s to %s\n' % (x, y)

617

)

620

)

618

if x == y or not x or not y:

621

if x == y or not x or not y:

619

return {}

622

return {}

620

if y.rev() is None and x == y.p1():

623

if y.rev() is None and x == y.p1():

621

if debug:

624

if debug:

622

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

625

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

623

# short-circuit to avoid issues with merge states

626

# short-circuit to avoid issues with merge states

624

return _dirstatecopies(repo, match)

627

return _dirstatecopies(repo, match)

625

a = y.ancestor(x)

628

a = y.ancestor(x)

626

if a == x:

629

if a == x:

627

if debug:

630

if debug:

628

repo.ui.debug(b'debug.copies: search mode: forward\n')

631

repo.ui.debug(b'debug.copies: search mode: forward\n')

629

copies = _forwardcopies(x, y, match=match)

632

copies = _forwardcopies(x, y, match=match)

630

elif a == y:

633

elif a == y:

631

if debug:

634

if debug:

632

repo.ui.debug(b'debug.copies: search mode: backward\n')

635

repo.ui.debug(b'debug.copies: search mode: backward\n')

633

copies = _backwardrenames(x, y, match=match)

636

copies = _backwardrenames(x, y, match=match)

634

else:

637

else:

635

if debug:

638

if debug:

636

repo.ui.debug(b'debug.copies: search mode: combined\n')

639

repo.ui.debug(b'debug.copies: search mode: combined\n')

637

base = None

640

base = None

638

if a.rev() != node.nullrev:

641

if a.rev() != node.nullrev:

639

base = x

642

base = x

640

copies = _chain(

643

copies = _chain(

641

_backwardrenames(x, a, match=match),

644

_backwardrenames(x, a, match=match),

642

_forwardcopies(a, y, base, match=match),

645

_forwardcopies(a, y, base, match=match),

643

)

646

)

644

_filter(x, y, copies)

647

_filter(x, y, copies)

645

return copies

648

return copies

646

649

647

650

648

def mergecopies(repo, c1, c2, base):

651

def mergecopies(repo, c1, c2, base):

649

"""

652

"""

650

Finds moves and copies between context c1 and c2 that are relevant for

653

Finds moves and copies between context c1 and c2 that are relevant for

651

merging. 'base' will be used as the merge base.

654

merging. 'base' will be used as the merge base.

652

655

653

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

656

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

654

files that were moved/ copied in one merge parent and modified in another.

657

files that were moved/ copied in one merge parent and modified in another.

655

For example:

658

For example:

656

659

657

o ---> 4 another commit

660

o ---> 4 another commit

658

|

661

|

659

| o ---> 3 commit that modifies a.txt

662

| o ---> 3 commit that modifies a.txt

660

| /

663

| /

661

o / ---> 2 commit that moves a.txt to b.txt

664

o / ---> 2 commit that moves a.txt to b.txt

662

|/

665

|/

663

o ---> 1 merge base

666

o ---> 1 merge base

664

667

665

If we try to rebase revision 3 on revision 4, since there is no a.txt in

668

If we try to rebase revision 3 on revision 4, since there is no a.txt in

666

revision 4, and if user have copytrace disabled, we prints the following

669

revision 4, and if user have copytrace disabled, we prints the following

667

message:

670

message:

668

671

669

```other changed <file> which local deleted```

672

```other changed <file> which local deleted```

670

673

671

Returns a tuple where:

674

Returns a tuple where:

672

675

673

"branch_copies" an instance of branch_copies.

676

"branch_copies" an instance of branch_copies.

674

677

675

"diverge" is a mapping of source name -> list of destination names

678

"diverge" is a mapping of source name -> list of destination names

676

for divergent renames.

679

for divergent renames.

677

680

678

This function calls different copytracing algorithms based on config.

681

This function calls different copytracing algorithms based on config.

679

"""

682

"""

680

# avoid silly behavior for update from empty dir

683

# avoid silly behavior for update from empty dir

681

if not c1 or not c2 or c1 == c2:

684

if not c1 or not c2 or c1 == c2:

682

return branch_copies(), branch_copies(), {}

685

return branch_copies(), branch_copies(), {}

683

686

684

narrowmatch = c1.repo().narrowmatch()

687

narrowmatch = c1.repo().narrowmatch()

685

688

686

# avoid silly behavior for parent -> working dir

689

# avoid silly behavior for parent -> working dir

687

if c2.node() is None and c1.node() == repo.dirstate.p1():

690

if c2.node() is None and c1.node() == repo.dirstate.p1():

688

return (

691

return (

689

branch_copies(_dirstatecopies(repo, narrowmatch)),

692

branch_copies(_dirstatecopies(repo, narrowmatch)),

690

branch_copies(),

693

branch_copies(),

691

{},

694

{},

692

)

695

)

693

696

694

copytracing = repo.ui.config(b'experimental', b'copytrace')

697

copytracing = repo.ui.config(b'experimental', b'copytrace')

695

if stringutil.parsebool(copytracing) is False:

698

if stringutil.parsebool(copytracing) is False:

696

# stringutil.parsebool() returns None when it is unable to parse the

699

# stringutil.parsebool() returns None when it is unable to parse the

697

# value, so we should rely on making sure copytracing is on such cases

700

# value, so we should rely on making sure copytracing is on such cases

698

return branch_copies(), branch_copies(), {}

701

return branch_copies(), branch_copies(), {}

699

702

700

if usechangesetcentricalgo(repo):

703

if usechangesetcentricalgo(repo):

701

# The heuristics don't make sense when we need changeset-centric algos

704

# The heuristics don't make sense when we need changeset-centric algos

702

return _fullcopytracing(repo, c1, c2, base)

705

return _fullcopytracing(repo, c1, c2, base)

703

706

704

# Copy trace disabling is explicitly below the node == p1 logic above

707

# Copy trace disabling is explicitly below the node == p1 logic above

705

# because the logic above is required for a simple copy to be kept across a

708

# because the logic above is required for a simple copy to be kept across a

706

# rebase.

709

# rebase.

707

if copytracing == b'heuristics':

710

if copytracing == b'heuristics':

708

# Do full copytracing if only non-public revisions are involved as

711

# Do full copytracing if only non-public revisions are involved as

709

# that will be fast enough and will also cover the copies which could

712

# that will be fast enough and will also cover the copies which could

710

# be missed by heuristics

713

# be missed by heuristics

711

if _isfullcopytraceable(repo, c1, base):

714

if _isfullcopytraceable(repo, c1, base):

712

return _fullcopytracing(repo, c1, c2, base)

715

return _fullcopytracing(repo, c1, c2, base)

713

return _heuristicscopytracing(repo, c1, c2, base)

716

return _heuristicscopytracing(repo, c1, c2, base)

714

else:

717

else:

715

return _fullcopytracing(repo, c1, c2, base)

718

return _fullcopytracing(repo, c1, c2, base)

716

719

717

720

718

def _isfullcopytraceable(repo, c1, base):

721

def _isfullcopytraceable(repo, c1, base):

719

"""Checks that if base, source and destination are all no-public branches,

722

"""Checks that if base, source and destination are all no-public branches,

720

if yes let's use the full copytrace algorithm for increased capabilities

723

if yes let's use the full copytrace algorithm for increased capabilities

721

since it will be fast enough.

724

since it will be fast enough.

722

725

723

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

726

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

724

number of changesets from c1 to base such that if number of changesets are

727

number of changesets from c1 to base such that if number of changesets are

725

more than the limit, full copytracing algorithm won't be used.

728

more than the limit, full copytracing algorithm won't be used.

726

"""

729

"""

727

if c1.rev() is None:

730

if c1.rev() is None:

728

c1 = c1.p1()

731

c1 = c1.p1()

729

if c1.mutable() and base.mutable():

732

if c1.mutable() and base.mutable():

730

sourcecommitlimit = repo.ui.configint(

733

sourcecommitlimit = repo.ui.configint(

731

b'experimental', b'copytrace.sourcecommitlimit'

734

b'experimental', b'copytrace.sourcecommitlimit'

732

)

735

)

733

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

736

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

734

return commits < sourcecommitlimit

737

return commits < sourcecommitlimit

735

return False

738

return False

736

739

737

740

738

def _checksinglesidecopies(

741

def _checksinglesidecopies(

739

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

742

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

740

):

743

):

741

if src not in m2:

744

if src not in m2:

742

# deleted on side 2

745

# deleted on side 2

743

if src not in m1:

746

if src not in m1:

744

# renamed on side 1, deleted on side 2

747

# renamed on side 1, deleted on side 2

745

renamedelete[src] = dsts1

748

renamedelete[src] = dsts1

746

elif src not in mb:

749

elif src not in mb:

747

# Work around the "short-circuit to avoid issues with merge states"

750

# Work around the "short-circuit to avoid issues with merge states"

748

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

751

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

749

# destination doesn't exist in y.

752

# destination doesn't exist in y.

750

pass

753

pass

751

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

754

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

752

return

755

return

753

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

756

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

754

# modified on side 2

757

# modified on side 2

755

for dst in dsts1:

758

for dst in dsts1:

756

copy[dst] = src

759

copy[dst] = src

757

760

758

761

759

class branch_copies(object):

762

class branch_copies(object):

760

"""Information about copies made on one side of a merge/graft.

763

"""Information about copies made on one side of a merge/graft.

761

764

762

"copy" is a mapping from destination name -> source name,

765

"copy" is a mapping from destination name -> source name,

763

where source is in c1 and destination is in c2 or vice-versa.

766

where source is in c1 and destination is in c2 or vice-versa.

764

767

765

"movewithdir" is a mapping from source name -> destination name,

768

"movewithdir" is a mapping from source name -> destination name,

766

where the file at source present in one context but not the other

769

where the file at source present in one context but not the other

767

needs to be moved to destination by the merge process, because the

770

needs to be moved to destination by the merge process, because the

768

other context moved the directory it is in.

771

other context moved the directory it is in.

769

772

770

"renamedelete" is a mapping of source name -> list of destination

773

"renamedelete" is a mapping of source name -> list of destination

771

names for files deleted in c1 that were renamed in c2 or vice-versa.

774

names for files deleted in c1 that were renamed in c2 or vice-versa.

772

775

773

"dirmove" is a mapping of detected source dir -> destination dir renames.

776

"dirmove" is a mapping of detected source dir -> destination dir renames.

774

This is needed for handling changes to new files previously grafted into

777

This is needed for handling changes to new files previously grafted into

775

renamed directories.

778

renamed directories.

776

"""

779

"""

777

780

778

def __init__(

781

def __init__(

779

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

782

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

780

):

783

):

781

self.copy = {} if copy is None else copy

784

self.copy = {} if copy is None else copy

782

self.renamedelete = {} if renamedelete is None else renamedelete

785

self.renamedelete = {} if renamedelete is None else renamedelete

783

self.dirmove = {} if dirmove is None else dirmove

786

self.dirmove = {} if dirmove is None else dirmove

784

self.movewithdir = {} if movewithdir is None else movewithdir

787

self.movewithdir = {} if movewithdir is None else movewithdir

785

788

786

def __repr__(self):

789

def __repr__(self):

787

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

790

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

788

self.copy,

791

self.copy,

789

self.renamedelete,

792

self.renamedelete,

790

self.dirmove,

793

self.dirmove,

791

self.movewithdir,

794

self.movewithdir,

792

)

795

)

793

796

794

797

795

def _fullcopytracing(repo, c1, c2, base):

798

def _fullcopytracing(repo, c1, c2, base):

796

"""The full copytracing algorithm which finds all the new files that were

799

"""The full copytracing algorithm which finds all the new files that were

797

added from merge base up to the top commit and for each file it checks if

800

added from merge base up to the top commit and for each file it checks if

798

this file was copied from another file.

801

this file was copied from another file.

799

802

800

This is pretty slow when a lot of changesets are involved but will track all

803

This is pretty slow when a lot of changesets are involved but will track all

801

the copies.

804

the copies.

802

"""

805

"""

803

m1 = c1.manifest()

806

m1 = c1.manifest()

804

m2 = c2.manifest()

807

m2 = c2.manifest()

805

mb = base.manifest()

808

mb = base.manifest()

806

809

807

copies1 = pathcopies(base, c1)

810

copies1 = pathcopies(base, c1)

808

copies2 = pathcopies(base, c2)

811

copies2 = pathcopies(base, c2)

809

812

810

if not (copies1 or copies2):

813

if not (copies1 or copies2):

811

return branch_copies(), branch_copies(), {}

814

return branch_copies(), branch_copies(), {}

812

815

813

inversecopies1 = {}

816

inversecopies1 = {}

814

inversecopies2 = {}

817

inversecopies2 = {}

815

for dst, src in copies1.items():

818

for dst, src in copies1.items():

816

inversecopies1.setdefault(src, []).append(dst)

819

inversecopies1.setdefault(src, []).append(dst)

817

for dst, src in copies2.items():

820

for dst, src in copies2.items():

818

inversecopies2.setdefault(src, []).append(dst)

821

inversecopies2.setdefault(src, []).append(dst)

819

822

820

copy1 = {}

823

copy1 = {}

821

copy2 = {}

824

copy2 = {}

822

diverge = {}

825

diverge = {}

823

renamedelete1 = {}

826

renamedelete1 = {}

824

renamedelete2 = {}

827

renamedelete2 = {}

825

allsources = set(inversecopies1) | set(inversecopies2)

828

allsources = set(inversecopies1) | set(inversecopies2)

826

for src in allsources:

829

for src in allsources:

827

dsts1 = inversecopies1.get(src)

830

dsts1 = inversecopies1.get(src)

828

dsts2 = inversecopies2.get(src)

831

dsts2 = inversecopies2.get(src)

829

if dsts1 and dsts2:

832

if dsts1 and dsts2:

830

# copied/renamed on both sides

833

# copied/renamed on both sides

831

if src not in m1 and src not in m2:

834

if src not in m1 and src not in m2:

832

# renamed on both sides

835

# renamed on both sides

833

dsts1 = set(dsts1)

836

dsts1 = set(dsts1)

834

dsts2 = set(dsts2)

837

dsts2 = set(dsts2)

835

# If there's some overlap in the rename destinations, we

838

# If there's some overlap in the rename destinations, we

836

# consider it not divergent. For example, if side 1 copies 'a'

839

# consider it not divergent. For example, if side 1 copies 'a'

837

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

840

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

838

# and 'd' and deletes 'a'.

841

# and 'd' and deletes 'a'.

839

if dsts1 & dsts2:

842

if dsts1 & dsts2:

840

for dst in dsts1 & dsts2:

843

for dst in dsts1 & dsts2:

841

copy1[dst] = src

844

copy1[dst] = src

842

copy2[dst] = src

845

copy2[dst] = src

843

else:

846

else:

844

diverge[src] = sorted(dsts1 | dsts2)

847

diverge[src] = sorted(dsts1 | dsts2)

845

elif src in m1 and src in m2:

848

elif src in m1 and src in m2:

846

# copied on both sides

849

# copied on both sides

847

dsts1 = set(dsts1)

850

dsts1 = set(dsts1)

848

dsts2 = set(dsts2)

851

dsts2 = set(dsts2)

849

for dst in dsts1 & dsts2:

852

for dst in dsts1 & dsts2:

850

copy1[dst] = src

853

copy1[dst] = src

851

copy2[dst] = src

854

copy2[dst] = src

852

# TODO: Handle cases where it was renamed on one side and copied

855

# TODO: Handle cases where it was renamed on one side and copied

853

# on the other side

856

# on the other side

854

elif dsts1:

857

elif dsts1:

855

# copied/renamed only on side 1

858

# copied/renamed only on side 1

856

_checksinglesidecopies(

859

_checksinglesidecopies(

857

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

860

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

858

)

861

)

859

elif dsts2:

862

elif dsts2:

860

# copied/renamed only on side 2

863

# copied/renamed only on side 2

861

_checksinglesidecopies(

864

_checksinglesidecopies(

862

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

865

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

863

)

866

)

864

867

865

# find interesting file sets from manifests

868

# find interesting file sets from manifests

866

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

869

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

867

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

870

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

868

u1 = sorted(addedinm1 - addedinm2)

871

u1 = sorted(addedinm1 - addedinm2)

869

u2 = sorted(addedinm2 - addedinm1)

872

u2 = sorted(addedinm2 - addedinm1)

870

873

871

header = b" unmatched files in %s"

874

header = b" unmatched files in %s"

872

if u1:

875

if u1:

873

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

876

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

874

if u2:

877

if u2:

875

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

878

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

876

879

877

if repo.ui.debugflag:

880

if repo.ui.debugflag:

878

renamedeleteset = set()

881

renamedeleteset = set()

879

divergeset = set()

882

divergeset = set()

880

for dsts in diverge.values():

883

for dsts in diverge.values():

881

divergeset.update(dsts)

884

divergeset.update(dsts)

882

for dsts in renamedelete1.values():

885

for dsts in renamedelete1.values():

883

renamedeleteset.update(dsts)

886

renamedeleteset.update(dsts)

884

for dsts in renamedelete2.values():

887

for dsts in renamedelete2.values():

885

renamedeleteset.update(dsts)

888

renamedeleteset.update(dsts)

886

889

887

repo.ui.debug(

890

repo.ui.debug(

888

b" all copies found (* = to merge, ! = divergent, "

891

b" all copies found (* = to merge, ! = divergent, "

889

b"% = renamed and deleted):\n"

892

b"% = renamed and deleted):\n"

890

)

893

)

891

for side, copies in ((b"local", copies1), (b"remote", copies2)):

894

for side, copies in ((b"local", copies1), (b"remote", copies2)):

892

if not copies:

895

if not copies:

893

continue

896

continue

894

repo.ui.debug(b" on %s side:\n" % side)

897

repo.ui.debug(b" on %s side:\n" % side)

895

for f in sorted(copies):

898

for f in sorted(copies):

896

note = b""

899

note = b""

897

if f in copy1 or f in copy2:

900

if f in copy1 or f in copy2:

898

note += b"*"

901

note += b"*"

899

if f in divergeset:

902

if f in divergeset:

900

note += b"!"

903

note += b"!"

901

if f in renamedeleteset:

904

if f in renamedeleteset:

902

note += b"%"

905

note += b"%"

903

repo.ui.debug(

906

repo.ui.debug(

904

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

907

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

905

)

908

)

906

del renamedeleteset

909

del renamedeleteset

907

del divergeset

910

del divergeset

908

911

909

repo.ui.debug(b" checking for directory renames\n")

912

repo.ui.debug(b" checking for directory renames\n")

910

913

911

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

914

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

912

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

915

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

913

916

914

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

917

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

915

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

918

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

916

919

917

return branch_copies1, branch_copies2, diverge

920

return branch_copies1, branch_copies2, diverge

918

921

919

922

920

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

923

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

921

"""Finds moved directories and files that should move with them.

924

"""Finds moved directories and files that should move with them.

922

925

923

ctx: the context for one of the sides

926

ctx: the context for one of the sides

924

copy: files copied on the same side (as ctx)

927

copy: files copied on the same side (as ctx)

925

fullcopy: files copied on the same side (as ctx), including those that

928

fullcopy: files copied on the same side (as ctx), including those that

926

merge.manifestmerge() won't care about

929

merge.manifestmerge() won't care about

927

addedfiles: added files on the other side (compared to ctx)

930

addedfiles: added files on the other side (compared to ctx)

928

"""

931

"""

929

# generate a directory move map

932

# generate a directory move map

930

d = ctx.dirs()

933

d = ctx.dirs()

931

invalid = set()

934

invalid = set()

932

dirmove = {}

935

dirmove = {}

933

936

934

# examine each file copy for a potential directory move, which is

937

# examine each file copy for a potential directory move, which is

935

# when all the files in a directory are moved to a new directory

938

# when all the files in a directory are moved to a new directory

936

for dst, src in pycompat.iteritems(fullcopy):

939

for dst, src in pycompat.iteritems(fullcopy):

937

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

940

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

938

if dsrc in invalid:

941

if dsrc in invalid:

939

# already seen to be uninteresting

942

# already seen to be uninteresting

940

continue

943

continue

941

elif dsrc in d and ddst in d:

944

elif dsrc in d and ddst in d:

942

# directory wasn't entirely moved locally

945

# directory wasn't entirely moved locally

943

invalid.add(dsrc)

946

invalid.add(dsrc)

944

elif dsrc in dirmove and dirmove[dsrc] != ddst:

947

elif dsrc in dirmove and dirmove[dsrc] != ddst:

945

# files from the same directory moved to two different places

948

# files from the same directory moved to two different places

946

invalid.add(dsrc)

949

invalid.add(dsrc)

947

else:

950

else:

948

# looks good so far

951

# looks good so far

949

dirmove[dsrc] = ddst

952

dirmove[dsrc] = ddst

950

953

951

for i in invalid:

954

for i in invalid:

952

if i in dirmove:

955

if i in dirmove:

953

del dirmove[i]

956

del dirmove[i]

954

del d, invalid

957

del d, invalid

955

958

956

if not dirmove:

959

if not dirmove:

957

return {}, {}

960

return {}, {}

958

961

959

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

962

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

960

963

961

for d in dirmove:

964

for d in dirmove:

962

repo.ui.debug(

965

repo.ui.debug(

963

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

966

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

964

)

967

)

965

968

966

movewithdir = {}

969

movewithdir = {}

967

# check unaccounted nonoverlapping files against directory moves

970

# check unaccounted nonoverlapping files against directory moves

968

for f in addedfiles:

971

for f in addedfiles:

969

if f not in fullcopy:

972

if f not in fullcopy:

970

for d in dirmove:

973

for d in dirmove:

971

if f.startswith(d):

974

if f.startswith(d):

972

# new file added in a directory that was moved, move it

975

# new file added in a directory that was moved, move it

973

df = dirmove[d] + f[len(d) :]

976

df = dirmove[d] + f[len(d) :]

974

if df not in copy:

977

if df not in copy:

975

movewithdir[f] = df

978

movewithdir[f] = df

976

repo.ui.debug(

979

repo.ui.debug(

977

b" pending file src: '%s' -> dst: '%s'\n"

980

b" pending file src: '%s' -> dst: '%s'\n"

978

% (f, df)

981

% (f, df)

979

)

982

)

980

break

983

break

981

984

982

return dirmove, movewithdir

985

return dirmove, movewithdir

983

986

984

987

985

def _heuristicscopytracing(repo, c1, c2, base):

988

def _heuristicscopytracing(repo, c1, c2, base):

986

"""Fast copytracing using filename heuristics

989

"""Fast copytracing using filename heuristics

987

990

988

Assumes that moves or renames are of following two types:

991

Assumes that moves or renames are of following two types:

989

992

990

1) Inside a directory only (same directory name but different filenames)

993

1) Inside a directory only (same directory name but different filenames)

991

2) Move from one directory to another

994

2) Move from one directory to another

992

(same filenames but different directory names)

995

(same filenames but different directory names)

993

996

994

Works only when there are no merge commits in the "source branch".

997

Works only when there are no merge commits in the "source branch".

995

Source branch is commits from base up to c2 not including base.

998

Source branch is commits from base up to c2 not including base.

996

999

997

If merge is involved it fallbacks to _fullcopytracing().

1000

If merge is involved it fallbacks to _fullcopytracing().

998

1001

999

Can be used by setting the following config:

1002

Can be used by setting the following config:

1000

1003

1001

[experimental]

1004

[experimental]

1002

copytrace = heuristics

1005

copytrace = heuristics

1003

1006

1004

In some cases the copy/move candidates found by heuristics can be very large

1007

In some cases the copy/move candidates found by heuristics can be very large

1005

in number and that will make the algorithm slow. The number of possible

1008

in number and that will make the algorithm slow. The number of possible

1006

candidates to check can be limited by using the config

1009

candidates to check can be limited by using the config

1007

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1010

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1008

"""

1011

"""

1009

1012

1010

if c1.rev() is None:

1013

if c1.rev() is None:

1011

c1 = c1.p1()

1014

c1 = c1.p1()

1012

if c2.rev() is None:

1015

if c2.rev() is None:

1013

c2 = c2.p1()

1016

c2 = c2.p1()

1014

1017

1015

changedfiles = set()

1018

changedfiles = set()

1016

m1 = c1.manifest()

1019

m1 = c1.manifest()

1017

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1020

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1018

# If base is not in c2 branch, we switch to fullcopytracing

1021

# If base is not in c2 branch, we switch to fullcopytracing

1019

repo.ui.debug(

1022

repo.ui.debug(

1020

b"switching to full copytracing as base is not "

1023

b"switching to full copytracing as base is not "

1021

b"an ancestor of c2\n"

1024

b"an ancestor of c2\n"

1022

)

1025

)

1023

return _fullcopytracing(repo, c1, c2, base)

1026

return _fullcopytracing(repo, c1, c2, base)

1024

1027

1025

ctx = c2

1028

ctx = c2

1026

while ctx != base:

1029

while ctx != base:

1027

if len(ctx.parents()) == 2:

1030

if len(ctx.parents()) == 2:

1028

# To keep things simple let's not handle merges

1031

# To keep things simple let's not handle merges

1029

repo.ui.debug(b"switching to full copytracing because of merges\n")

1032

repo.ui.debug(b"switching to full copytracing because of merges\n")

1030

return _fullcopytracing(repo, c1, c2, base)

1033

return _fullcopytracing(repo, c1, c2, base)

1031

changedfiles.update(ctx.files())

1034

changedfiles.update(ctx.files())

1032

ctx = ctx.p1()

1035

ctx = ctx.p1()

1033

1036

1034

copies2 = {}

1037

copies2 = {}

1035

cp = _forwardcopies(base, c2)

1038

cp = _forwardcopies(base, c2)

1036

for dst, src in pycompat.iteritems(cp):

1039

for dst, src in pycompat.iteritems(cp):

1037

if src in m1:

1040

if src in m1:

1038

copies2[dst] = src

1041

copies2[dst] = src

1039

1042

1040

# file is missing if it isn't present in the destination, but is present in

1043

# file is missing if it isn't present in the destination, but is present in

1041

# the base and present in the source.

1044

# the base and present in the source.

1042

# Presence in the base is important to exclude added files, presence in the

1045

# Presence in the base is important to exclude added files, presence in the

1043

# source is important to exclude removed files.

1046

# source is important to exclude removed files.

1044

filt = lambda f: f not in m1 and f in base and f in c2

1047

filt = lambda f: f not in m1 and f in base and f in c2

1045

missingfiles = [f for f in changedfiles if filt(f)]

1048

missingfiles = [f for f in changedfiles if filt(f)]

1046

1049

1047

copies1 = {}

1050

copies1 = {}

1048

if missingfiles:

1051

if missingfiles:

1049

basenametofilename = collections.defaultdict(list)

1052

basenametofilename = collections.defaultdict(list)

1050

dirnametofilename = collections.defaultdict(list)

1053

dirnametofilename = collections.defaultdict(list)

1051

1054

1052

for f in m1.filesnotin(base.manifest()):

1055

for f in m1.filesnotin(base.manifest()):

1053

basename = os.path.basename(f)

1056

basename = os.path.basename(f)

1054

dirname = os.path.dirname(f)

1057

dirname = os.path.dirname(f)

1055

basenametofilename[basename].append(f)

1058

basenametofilename[basename].append(f)

1056

dirnametofilename[dirname].append(f)

1059

dirnametofilename[dirname].append(f)

1057

1060

1058

for f in missingfiles:

1061

for f in missingfiles:

1059

basename = os.path.basename(f)

1062

basename = os.path.basename(f)

1060

dirname = os.path.dirname(f)

1063

dirname = os.path.dirname(f)

1061

samebasename = basenametofilename[basename]

1064

samebasename = basenametofilename[basename]

1062

samedirname = dirnametofilename[dirname]

1065

samedirname = dirnametofilename[dirname]

1063

movecandidates = samebasename + samedirname

1066

movecandidates = samebasename + samedirname

1064

# f is guaranteed to be present in c2, that's why

1067

# f is guaranteed to be present in c2, that's why

1065

# c2.filectx(f) won't fail

1068

# c2.filectx(f) won't fail

1066

f2 = c2.filectx(f)

1069

f2 = c2.filectx(f)

1067

# we can have a lot of candidates which can slow down the heuristics

1070

# we can have a lot of candidates which can slow down the heuristics

1068

# config value to limit the number of candidates moves to check

1071

# config value to limit the number of candidates moves to check

1069

maxcandidates = repo.ui.configint(

1072

maxcandidates = repo.ui.configint(

1070

b'experimental', b'copytrace.movecandidateslimit'

1073

b'experimental', b'copytrace.movecandidateslimit'

1071

)

1074

)

1072

1075

1073

if len(movecandidates) > maxcandidates:

1076

if len(movecandidates) > maxcandidates:

1074

repo.ui.status(

1077

repo.ui.status(

1075

_(

1078

_(

1076

b"skipping copytracing for '%s', more "

1079

b"skipping copytracing for '%s', more "

1077

b"candidates than the limit: %d\n"

1080

b"candidates than the limit: %d\n"

1078

)

1081

)

1079

% (f, len(movecandidates))

1082

% (f, len(movecandidates))

1080

)

1083

)

1081

continue

1084

continue

1082

1085

1083

for candidate in movecandidates:

1086

for candidate in movecandidates:

1084

f1 = c1.filectx(candidate)

1087

f1 = c1.filectx(candidate)

1085

if _related(f1, f2):

1088

if _related(f1, f2):

1086

# if there are a few related copies then we'll merge

1089

# if there are a few related copies then we'll merge

1087

# changes into all of them. This matches the behaviour

1090

# changes into all of them. This matches the behaviour

1088

# of upstream copytracing

1091

# of upstream copytracing

1089

copies1[candidate] = f

1092

copies1[candidate] = f

1090

1093

1091

return branch_copies(copies1), branch_copies(copies2), {}

1094

return branch_copies(copies1), branch_copies(copies2), {}

1092

1095

1093

1096

1094

def _related(f1, f2):

1097

def _related(f1, f2):

1095

"""return True if f1 and f2 filectx have a common ancestor

1098

"""return True if f1 and f2 filectx have a common ancestor

1096

1099

1097

Walk back to common ancestor to see if the two files originate

1100

Walk back to common ancestor to see if the two files originate

1098

from the same file. Since workingfilectx's rev() is None it messes

1101

from the same file. Since workingfilectx's rev() is None it messes

1099

up the integer comparison logic, hence the pre-step check for

1102

up the integer comparison logic, hence the pre-step check for

1100

None (f1 and f2 can only be workingfilectx's initially).

1103

None (f1 and f2 can only be workingfilectx's initially).

1101

"""

1104

"""

1102

1105

1103

if f1 == f2:

1106

if f1 == f2:

1104

return True # a match

1107

return True # a match

1105

1108

1106

g1, g2 = f1.ancestors(), f2.ancestors()

1109

g1, g2 = f1.ancestors(), f2.ancestors()

1107

try:

1110

try:

1108

f1r, f2r = f1.linkrev(), f2.linkrev()

1111

f1r, f2r = f1.linkrev(), f2.linkrev()

1109

1112

1110

if f1r is None:

1113

if f1r is None:

1111

f1 = next(g1)

1114

f1 = next(g1)

1112

if f2r is None:

1115

if f2r is None:

1113

f2 = next(g2)

1116

f2 = next(g2)

1114

1117

1115

while True:

1118

while True:

1116

f1r, f2r = f1.linkrev(), f2.linkrev()

1119

f1r, f2r = f1.linkrev(), f2.linkrev()

1117

if f1r > f2r:

1120

if f1r > f2r:

1118

f1 = next(g1)

1121

f1 = next(g1)

1119

elif f2r > f1r:

1122

elif f2r > f1r:

1120

f2 = next(g2)

1123

f2 = next(g2)

1121

else: # f1 and f2 point to files in the same linkrev

1124

else: # f1 and f2 point to files in the same linkrev

1122

return f1 == f2 # true if they point to the same file

1125

return f1 == f2 # true if they point to the same file

1123

except StopIteration:

1126

except StopIteration:

1124

return False

1127

return False

1125

1128

1126

1129

1127

def graftcopies(wctx, ctx, base):

1130

def graftcopies(wctx, ctx, base):

1128

"""reproduce copies between base and ctx in the wctx

1131

"""reproduce copies between base and ctx in the wctx

1129

1132

1130

Unlike mergecopies(), this function will only consider copies between base

1133

Unlike mergecopies(), this function will only consider copies between base

1131

and ctx; it will ignore copies between base and wctx. Also unlike

1134

and ctx; it will ignore copies between base and wctx. Also unlike

1132

mergecopies(), this function will apply copies to the working copy (instead

1135

mergecopies(), this function will apply copies to the working copy (instead

1133

of just returning information about the copies). That makes it cheaper

1136

of just returning information about the copies). That makes it cheaper

1134

(especially in the common case of base==ctx.p1()) and useful also when

1137

(especially in the common case of base==ctx.p1()) and useful also when

1135

experimental.copytrace=off.

1138

experimental.copytrace=off.

1136

1139

1137

merge.update() will have already marked most copies, but it will only

1140

merge.update() will have already marked most copies, but it will only

1138

mark copies if it thinks the source files are related (see

1141

mark copies if it thinks the source files are related (see

1139

merge._related()). It will also not mark copies if the file wasn't modified

1142

merge._related()). It will also not mark copies if the file wasn't modified

1140

on the local side. This function adds the copies that were "missed"

1143

on the local side. This function adds the copies that were "missed"

1141

by merge.update().

1144

by merge.update().

1142

"""

1145

"""

1143

new_copies = pathcopies(base, ctx)

1146

new_copies = pathcopies(base, ctx)

1144

_filter(wctx.p1(), wctx, new_copies)

1147

_filter(wctx.p1(), wctx, new_copies)

1145

for dst, src in pycompat.iteritems(new_copies):

1148

for dst, src in pycompat.iteritems(new_copies):

1146

wctx[dst].markcopied(src)

1149

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import flagutil
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 # A small cache to avoid doing the work twice for merges
                 #
                 # In the vast majority of cases, if we ask information for a revision
                 # about 1 parent, we'll later ask it for the other. So it make sense to
                 # keep the information around when reaching the first parent of a merge
                 # and dropping it after it was provided for the second parents.
                 #
                 # It exists cases were only one parent of the merge will be walked. It
                 # happens when the "destination" the copy tracing is descendant from a
                 # new root, not common with the "source". In that case, we will only walk
                 # through merge parents that are descendant of changesets common
                 # between "source" and "destination".
                 #
                 # With the current case implementation if such changesets have a copy
                 # information, we'll keep them in memory until the end of
                 # _changesetforwardcopies. We don't expect the case to be frequent
                 # enough to matters.
                 #
                 # In addition, it would be possible to reach pathological case, were
                 # many first parent are met before any second parent is reached. In
                 # that case the cache could grow. If this even become an issue one can
                 # safely introduce a maximum cache size. This would trade extra CPU/IO
                 # time to save memory.
                 merge_caches = {}
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     value = None
                     e = merge_caches.pop(rev, None)
                     if e is not None:
                         return e
                     changes = None
                     if flags(rev) & HASCOPIESINFO:
                         changes = changelogrevision(rev).changes
                     value = (p1, p2, changes)
                     if p1 != node.nullrev and p2 != node.nullrev:
                         # XXX some case we over cache, IGNORE
                         merge_caches[rev] = value
                     return value
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 if repo.filecopiesmode == b'changeset-sidedata':
                     revinfo = _revinfo_getter(repo)
                     return _combine_changeset_copies(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None and alwaysmatch:
                     return rustmod.combine_changeset_copies(
                         list(revs), children, targetrev, revinfo, isancestor
                     )
                 isancestor = cached_is_ancestor(isancestor)
                 all_copies = {}
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, changes = revinfo(c)
                         childcopies = {}
                         if r == p1:
                             parent = 1
                             if changes is not None:
                                 childcopies = changes.copied_from_p1
                         else:
                             assert r == p2
                             parent = 2
                             if changes is not None:
                                 childcopies = changes.copied_from_p2
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         if changes is not None and changes.removed:
                             if newcopies is copies:
                                 newcopies = copies.copy()
                             for f in changes.removed:
                                 if f in newcopies:
                                     if newcopies is copies:
                                         # copy on write to avoid affecting potential other
                                         # branches.  when there are no other branches, this
                                         # could be avoided.
                                         newcopies = copies.copy()
                                     newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         elif newcopies is othercopies:
                             # nothing to merge:
                             pass
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 minor, major = othercopies, newcopies
                             else:
                                 minor, major = newcopies, othercopies
-                            _merge_copies_dict(minor, major, isancestor, changes)
+                            copies = _merge_copies_dict(minor, major, isancestor, changes)
-                            all_copies[c] = minor
+                            all_copies[c] = copies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
+                return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if new_tt == other_tt:
                             minor[dest] = value
                         elif (
                             changes is not None
                             and value[1] is None
                             and dest in changes.salvaged
                         ):
                             pass
                         elif (
                             changes is not None
                             and other[1] is None
                             and dest in changes.salvaged
                         ):
                             minor[dest] = value
                         elif changes is not None and dest in changes.merged:
                             minor[dest] = value
                         elif not isancestor(new_tt, other_tt):
                             if value[1] is not None:
                                 minor[dest] = value
                             elif isancestor(other_tt, new_tt):
                                 minor[dest] = value
+                return minor
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)