upstream/mercurial-mirror Commit - r46800:cb8b2ee8

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import (

15

from .node import (

16

nullid,

16

nullid,

17

nullrev,

17

nullrev,

18

)

18

)

19

20

from . import (

20

from . import (

21

match as matchmod,

21

match as matchmod,

22

pathutil,

22

pathutil,

23

policy,

23

policy,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

from .revlogutils import (

31

from .revlogutils import (

32

flagutil,

32

flagutil,

33

sidedata as sidedatamod,

33

sidedata as sidedatamod,

34

)

34

)

35

36

rustmod = policy.importrust("copy_tracing")

36

rustmod = policy.importrust("copy_tracing")

37

38

39

def _filter(src, dst, t):

39

def _filter(src, dst, t):

40

"""filters out invalid copies after chaining"""

40

"""filters out invalid copies after chaining"""

41

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

44

# in the following table (not including trivial cases). For example, case 2

44

# in the following table (not including trivial cases). For example, case 2

45

# is where a file existed in 'src' and remained under that name in 'mid' and

45

# is where a file existed in 'src' and remained under that name in 'mid' and

46

# then was renamed between 'mid' and 'dst'.

46

# then was renamed between 'mid' and 'dst'.

47

#

47

#

48

# case src mid dst result

48

# case src mid dst result

49

# 1 x y - -

49

# 1 x y - -

50

# 2 x y y x->y

50

# 2 x y y x->y

51

# 3 x y x -

51

# 3 x y x -

52

# 4 x y z x->z

52

# 4 x y z x->z

53

# 5 - x y -

53

# 5 - x y -

54

# 6 x x y x->y

54

# 6 x x y x->y

55

#

55

#

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

58

# between 5 and 6, so it includes all cases in its result.

58

# between 5 and 6, so it includes all cases in its result.

59

# Cases 1, 3, and 5 are then removed by _filter().

59

# Cases 1, 3, and 5 are then removed by _filter().

60

61

for k, v in list(t.items()):

61

for k, v in list(t.items()):

62

# remove copies from files that didn't exist

62

# remove copies from files that didn't exist

63

if v not in src:

63

if v not in src:

64

del t[k]

64

del t[k]

65

# remove criss-crossed copies

65

# remove criss-crossed copies

66

elif k in src and v in dst:

66

elif k in src and v in dst:

67

del t[k]

67

del t[k]

68

# remove copies to files that were then removed

68

# remove copies to files that were then removed

69

elif k not in dst:

69

elif k not in dst:

70

del t[k]

70

del t[k]

71

72

73

def _chain(prefix, suffix):

73

def _chain(prefix, suffix):

74

"""chain two sets of copies 'prefix' and 'suffix'"""

74

"""chain two sets of copies 'prefix' and 'suffix'"""

75

result = prefix.copy()

75

result = prefix.copy()

76

for key, value in pycompat.iteritems(suffix):

76

for key, value in pycompat.iteritems(suffix):

77

result[key] = prefix.get(value, value)

77

result[key] = prefix.get(value, value)

78

return result

78

return result

79

80

81

def _tracefile(fctx, am, basemf):

81

def _tracefile(fctx, am, basemf):

82

"""return file context that is the ancestor of fctx present in ancestor

82

"""return file context that is the ancestor of fctx present in ancestor

83

manifest am

83

manifest am

84

85

Note: we used to try and stop after a given limit, however checking if that

85

Note: we used to try and stop after a given limit, however checking if that

86

limit is reached turned out to be very expensive. we are better off

86

limit is reached turned out to be very expensive. we are better off

87

disabling that feature."""

87

disabling that feature."""

88

89

for f in fctx.ancestors():

89

for f in fctx.ancestors():

90

path = f.path()

90

path = f.path()

91

if am.get(path, None) == f.filenode():

91

if am.get(path, None) == f.filenode():

92

return path

92

return path

93

if basemf and basemf.get(path, None) == f.filenode():

93

if basemf and basemf.get(path, None) == f.filenode():

94

return path

94

return path

95

96

97

def _dirstatecopies(repo, match=None):

97

def _dirstatecopies(repo, match=None):

98

ds = repo.dirstate

98

ds = repo.dirstate

99

c = ds.copies().copy()

99

c = ds.copies().copy()

100

for k in list(c):

100

for k in list(c):

101

if ds[k] not in b'anm' or (match and not match(k)):

101

if ds[k] not in b'anm' or (match and not match(k)):

102

del c[k]

102

del c[k]

103

return c

103

return c

104

105

106

def _computeforwardmissing(a, b, match=None):

106

def _computeforwardmissing(a, b, match=None):

107

"""Computes which files are in b but not a.

107

"""Computes which files are in b but not a.

108

This is its own function so extensions can easily wrap this call to see what

108

This is its own function so extensions can easily wrap this call to see what

109

files _forwardcopies is about to process.

109

files _forwardcopies is about to process.

110

"""

110

"""

111

ma = a.manifest()

111

ma = a.manifest()

112

mb = b.manifest()

112

mb = b.manifest()

113

return mb.filesnotin(ma, match=match)

113

return mb.filesnotin(ma, match=match)

114

115

116

def usechangesetcentricalgo(repo):

116

def usechangesetcentricalgo(repo):

117

"""Checks if we should use changeset-centric copy algorithms"""

117

"""Checks if we should use changeset-centric copy algorithms"""

118

if repo.filecopiesmode == b'changeset-sidedata':

118

if repo.filecopiesmode == b'changeset-sidedata':

119

return True

119

return True

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

121

changesetsource = (b'changeset-only', b'compatibility')

121

changesetsource = (b'changeset-only', b'compatibility')

122

return readfrom in changesetsource

122

return readfrom in changesetsource

123

124

125

def _committedforwardcopies(a, b, base, match):

125

def _committedforwardcopies(a, b, base, match):

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

127

# files might have to be traced back to the fctx parent of the last

127

# files might have to be traced back to the fctx parent of the last

128

# one-side-only changeset, but not further back than that

128

# one-side-only changeset, but not further back than that

129

repo = a._repo

129

repo = a._repo

130

131

if usechangesetcentricalgo(repo):

131

if usechangesetcentricalgo(repo):

132

return _changesetforwardcopies(a, b, match)

132

return _changesetforwardcopies(a, b, match)

133

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

135

dbg = repo.ui.debug

135

dbg = repo.ui.debug

136

if debug:

136

if debug:

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

138

am = a.manifest()

138

am = a.manifest()

139

basemf = None if base is None else base.manifest()

139

basemf = None if base is None else base.manifest()

140

141

# find where new files came from

141

# find where new files came from

142

# we currently don't try to find where old files went, too expensive

142

# we currently don't try to find where old files went, too expensive

143

# this means we can miss a case like 'hg rm b; hg cp a b'

143

# this means we can miss a case like 'hg rm b; hg cp a b'

144

cm = {}

144

cm = {}

145

146

# Computing the forward missing is quite expensive on large manifests, since

146

# Computing the forward missing is quite expensive on large manifests, since

147

# it compares the entire manifests. We can optimize it in the common use

147

# it compares the entire manifests. We can optimize it in the common use

148

# case of computing what copies are in a commit versus its parent (like

148

# case of computing what copies are in a commit versus its parent (like

149

# during a rebase or histedit). Note, we exclude merge commits from this

149

# during a rebase or histedit). Note, we exclude merge commits from this

150

# optimization, since the ctx.files() for a merge commit is not correct for

150

# optimization, since the ctx.files() for a merge commit is not correct for

151

# this comparison.

151

# this comparison.

152

forwardmissingmatch = match

152

forwardmissingmatch = match

153

if b.p1() == a and b.p2().node() == nullid:

153

if b.p1() == a and b.p2().node() == nullid:

154

filesmatcher = matchmod.exact(b.files())

154

filesmatcher = matchmod.exact(b.files())

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

157

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

159

160

if debug:

160

if debug:

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

162

163

for f in sorted(missing):

163

for f in sorted(missing):

164

if debug:

164

if debug:

165

dbg(b'debug.copies: tracing file: %s\n' % f)

165

dbg(b'debug.copies: tracing file: %s\n' % f)

166

fctx = b[f]

166

fctx = b[f]

167

fctx._ancestrycontext = ancestrycontext

167

fctx._ancestrycontext = ancestrycontext

168

169

if debug:

169

if debug:

170

start = util.timer()

170

start = util.timer()

171

opath = _tracefile(fctx, am, basemf)

171

opath = _tracefile(fctx, am, basemf)

172

if opath:

172

if opath:

173

if debug:

173

if debug:

174

dbg(b'debug.copies: rename of: %s\n' % opath)

174

dbg(b'debug.copies: rename of: %s\n' % opath)

175

cm[f] = opath

175

cm[f] = opath

176

if debug:

176

if debug:

177

dbg(

177

dbg(

178

b'debug.copies: time: %f seconds\n'

178

b'debug.copies: time: %f seconds\n'

179

% (util.timer() - start)

179

% (util.timer() - start)

180

)

180

)

181

return cm

181

return cm

182

183

184

def _revinfo_getter(repo, match):

184

def _revinfo_getter(repo, match):

185

"""returns a function that returns the following data given a <rev>"

185

"""returns a function that returns the following data given a <rev>"

186

187

* p1: revision number of first parent

187

* p1: revision number of first parent

188

* p2: revision number of first parent

188

* p2: revision number of first parent

189

* changes: a ChangingFiles object

189

* changes: a ChangingFiles object

190

"""

190

"""

191

cl = repo.changelog

191

cl = repo.changelog

192

parents = cl.parentrevs

192

parents = cl.parentrevs

193

flags = cl.flags

193

flags = cl.flags

194

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

196

197

changelogrevision = cl.changelogrevision

197

changelogrevision = cl.changelogrevision

198

199

if rustmod is not None:

199

if rustmod is not None:

200

201

def revinfo(rev):

201

def revinfo(rev):

202

p1, p2 = parents(rev)

202

p1, p2 = parents(rev)

203

if flags(rev) & HASCOPIESINFO:

203

if flags(rev) & HASCOPIESINFO:

204

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

204

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

205

else:

205

else:

206

raw = None

206

raw = None

207

return (p1, p2, raw)

207

return (p1, p2, raw)

208

209

else:

209

else:

210

211

def revinfo(rev):

211

def revinfo(rev):

212

p1, p2 = parents(rev)

212

p1, p2 = parents(rev)

213

if flags(rev) & HASCOPIESINFO:

213

if flags(rev) & HASCOPIESINFO:

214

changes = changelogrevision(rev).changes

214

changes = changelogrevision(rev).changes

215

else:

215

else:

216

changes = None

216

changes = None

217

return (p1, p2, changes)

217

return (p1, p2, changes)

218

219

return revinfo

219

return revinfo

220

221

222

def cached_is_ancestor(is_ancestor):

222

def cached_is_ancestor(is_ancestor):

223

"""return a cached version of is_ancestor"""

223

"""return a cached version of is_ancestor"""

224

cache = {}

224

cache = {}

225

226

def _is_ancestor(anc, desc):

226

def _is_ancestor(anc, desc):

227

if anc > desc:

227

if anc > desc:

228

return False

228

return False

229

elif anc == desc:

229

elif anc == desc:

230

return True

230

return True

231

key = (anc, desc)

231

key = (anc, desc)

232

ret = cache.get(key)

232

ret = cache.get(key)

233

if ret is None:

233

if ret is None:

234

ret = cache[key] = is_ancestor(anc, desc)

234

ret = cache[key] = is_ancestor(anc, desc)

235

return ret

235

return ret

236

237

return _is_ancestor

237

return _is_ancestor

238

239

240

def _changesetforwardcopies(a, b, match):

240

def _changesetforwardcopies(a, b, match):

241

if a.rev() in (nullrev, b.rev()):

241

if a.rev() in (nullrev, b.rev()):

242

return {}

242

return {}

243

244

repo = a.repo().unfiltered()

244

repo = a.repo().unfiltered()

245

children = {}

245

children = {}

246

247

cl = repo.changelog

247

cl = repo.changelog

248

isancestor = cl.isancestorrev

248

isancestor = cl.isancestorrev

249

250

# To track rename from "A" to B, we need to gather all parent → children

250

# To track rename from "A" to B, we need to gather all parent → children

251

# edges that are contains in `::B` but not in `::A`.

251

# edges that are contains in `::B` but not in `::A`.

252

#

252

#

253

#

253

#

254

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

254

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

255

# ::a`) and also all the "roots point", ie the parents of the exclusive set

255

# ::a`) and also all the "roots point", ie the parents of the exclusive set

256

# that belong to ::a. These are exactly all the revisions needed to express

256

# that belong to ::a. These are exactly all the revisions needed to express

257

# the parent → children we need to combine.

257

# the parent → children we need to combine.

258

#

258

#

259

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

259

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

260

# excluding paths that leads to roots that are not ancestors of `a`. We

260

# excluding paths that leads to roots that are not ancestors of `a`. We

261

# keep this out of the explanation because it is hard enough without this special case..

261

# keep this out of the explanation because it is hard enough without this special case..

262

263

parents = cl._uncheckedparentrevs

263

parents = cl._uncheckedparentrevs

264

graph_roots = (nullrev, nullrev)

264

graph_roots = (nullrev, nullrev)

265

266

ancestors = cl.ancestors([a.rev()], inclusive=True)

266

ancestors = cl.ancestors([a.rev()], inclusive=True)

267

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

267

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

268

roots = set()

268

roots = set()

269

has_graph_roots = False

269

has_graph_roots = False

270

271

# iterate over `only(B, A)`

271

# iterate over `only(B, A)`

272

for r in revs:

272

for r in revs:

273

ps = parents(r)

273

ps = parents(r)

274

if ps == graph_roots:

274

if ps == graph_roots:

275

has_graph_roots = True

275

has_graph_roots = True

276

else:

276

else:

277

p1, p2 = ps

277

p1, p2 = ps

278

279

# find all the "root points" (see larger comment above)

279

# find all the "root points" (see larger comment above)

280

if p1 != nullrev and p1 in ancestors:

280

if p1 != nullrev and p1 in ancestors:

281

roots.add(p1)

281

roots.add(p1)

282

if p2 != nullrev and p2 in ancestors:

282

if p2 != nullrev and p2 in ancestors:

283

roots.add(p2)

283

roots.add(p2)

284

if not roots:

284

if not roots:

285

# no common revision to track copies from

285

# no common revision to track copies from

286

return {}

286

return {}

287

if has_graph_roots:

287

if has_graph_roots:

288

# this deal with the special case mentionned in the [1] footnotes. We

288

# this deal with the special case mentionned in the [1] footnotes. We

289

# must filter out revisions that leads to non-common graphroots.

289

# must filter out revisions that leads to non-common graphroots.

290

roots = list(roots)

290

roots = list(roots)

291

m = min(roots)

291

m = min(roots)

292

h = [b.rev()]

292

h = [b.rev()]

293

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

293

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

294

roots_to_head = set(roots_to_head)

294

roots_to_head = set(roots_to_head)

295

revs = [r for r in revs if r in roots_to_head]

295

revs = [r for r in revs if r in roots_to_head]

296

297

if repo.filecopiesmode == b'changeset-sidedata':

297

if repo.filecopiesmode == b'changeset-sidedata':

298

# When using side-data, we will process the edges "from" the children.

298

# When using side-data, we will process the edges "from" the children.

299

# We iterate over the childre, gathering previous collected data for

299

# We iterate over the childre, gathering previous collected data for

300

# the parents. Do know when the parents data is no longer necessary, we

300

# the parents. Do know when the parents data is no longer necessary, we

301

# keep a counter of how many children each revision has.

301

# keep a counter of how many children each revision has.

302

#

302

#

303

# An interresting property of `children_count` is that it only contains

303

# An interresting property of `children_count` is that it only contains

304

# revision that will be relevant for a edge of the graph. So if a

304

# revision that will be relevant for a edge of the graph. So if a

305

# children has parent not in `children_count`, that edges should not be

305

# children has parent not in `children_count`, that edges should not be

306

# processed.

306

# processed.

307

children_count = dict((r, 0) for r in roots)

307

children_count = dict((r, 0) for r in roots)

308

for r in revs:

308

for r in revs:

309

for p in cl.parentrevs(r):

309

for p in cl.parentrevs(r):

310

if p == nullrev:

310

if p == nullrev:

311

continue

311

continue

312

children_count[r] = 0

312

children_count[r] = 0

313

if p in children_count:

313

if p in children_count:

314

children_count[p] += 1

314

children_count[p] += 1

315

revinfo = _revinfo_getter(repo, match)

315

revinfo = _revinfo_getter(repo, match)

316

return _combine_changeset_copies(

316

return _combine_changeset_copies(

317

revs, children_count, b.rev(), revinfo, match, isancestor

317

revs, children_count, b.rev(), revinfo, match, isancestor

318

)

318

)

319

else:

319

else:

320

# When not using side-data, we will process the edges "from" the parent.

320

# When not using side-data, we will process the edges "from" the parent.

321

# so we need a full mapping of the parent -> children relation.

321

# so we need a full mapping of the parent -> children relation.

322

children = dict((r, []) for r in roots)

322

children = dict((r, []) for r in roots)

323

for r in revs:

323

for r in revs:

324

for p in cl.parentrevs(r):

324

for p in cl.parentrevs(r):

325

if p == nullrev:

325

if p == nullrev:

326

continue

326

continue

327

children[r] = []

327

children[r] = []

328

if p in children:

328

if p in children:

329

children[p].append(r)

329

children[p].append(r)

330

x = revs.pop()

330

x = revs.pop()

331

assert x == b.rev()

331

assert x == b.rev()

332

revs.extend(roots)

332

revs.extend(roots)

333

revs.sort()

333

revs.sort()

334

335

revinfo = _revinfo_getter_extra(repo)

335

revinfo = _revinfo_getter_extra(repo)

336

return _combine_changeset_copies_extra(

336

return _combine_changeset_copies_extra(

337

revs, children, b.rev(), revinfo, match, isancestor

337

revs, children, b.rev(), revinfo, match, isancestor

338

)

338

)

339

340

341

def _combine_changeset_copies(

341

def _combine_changeset_copies(

342

revs, children_count, targetrev, revinfo, match, isancestor

342

revs, children_count, targetrev, revinfo, match, isancestor

343

):

343

):

344

"""combine the copies information for each item of iterrevs

344

"""combine the copies information for each item of iterrevs

345

346

revs: sorted iterable of revision to visit

346

revs: sorted iterable of revision to visit

347

children_count: a {parent: <number-of-relevant-children>} mapping.

347

children_count: a {parent: <number-of-relevant-children>} mapping.

348

targetrev: the final copies destination revision (not in iterrevs)

348

targetrev: the final copies destination revision (not in iterrevs)

349

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

349

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

350

match: a matcher

350

match: a matcher

351

352

It returns the aggregated copies information for `targetrev`.

352

It returns the aggregated copies information for `targetrev`.

353

"""

353

"""

354

355

alwaysmatch = match.always()

355

alwaysmatch = match.always()

356

357

if rustmod is not None:

357

if rustmod is not None:

358

final_copies = rustmod.combine_changeset_copies(

358

final_copies = rustmod.combine_changeset_copies(

359

list(revs), children_count, targetrev, revinfo, isancestor

359

list(revs), children_count, targetrev, revinfo, isancestor

360

)

360

)

361

else:

361

else:

362

isancestor = cached_is_ancestor(isancestor)

362

isancestor = cached_is_ancestor(isancestor)

363

364

all_copies = {}

364

all_copies = {}

365

# iterate over all the "children" side of copy tracing "edge"

365

# iterate over all the "children" side of copy tracing "edge"

366

for current_rev in revs:

366

for current_rev in revs:

367

p1, p2, changes = revinfo(current_rev)

367

p1, p2, changes = revinfo(current_rev)

368

current_copies = None

368

current_copies = None

369

# iterate over all parents to chain the existing data with the

369

# iterate over all parents to chain the existing data with the

370

# data from the parent → child edge.

370

# data from the parent → child edge.

371

for parent, parent_rev in ((1, p1), (2, p2)):

371

for parent, parent_rev in ((1, p1), (2, p2)):

372

if parent_rev == nullrev:

372

if parent_rev == nullrev:

373

continue

373

continue

374

remaining_children = children_count.get(parent_rev)

374

remaining_children = children_count.get(parent_rev)

375

if remaining_children is None:

375

if remaining_children is None:

376

continue

376

continue

377

remaining_children -= 1

377

remaining_children -= 1

378

children_count[parent_rev] = remaining_children

378

children_count[parent_rev] = remaining_children

379

if remaining_children:

379

if remaining_children:

380

copies = all_copies.get(parent_rev, None)

380

copies = all_copies.get(parent_rev, None)

381

else:

381

else:

382

copies = all_copies.pop(parent_rev, None)

382

copies = all_copies.pop(parent_rev, None)

383

384

if copies is None:

384

if copies is None:

385

# this is a root

385

# this is a root

386

copies = {}

386

newcopies = copies = {}

387

elif remaining_children:

388

newcopies = copies.copy()

389

else:

388

newcopies = copies

390

newcopies = copies

389

# chain the data in the edge with the existing data

391

# chain the data in the edge with the existing data

390

if changes is not None:

392

if changes is not None:

391

childcopies = {}

393

childcopies = {}

392

if parent == 1:

394

if parent == 1:

393

childcopies = changes.copied_from_p1

395

childcopies = changes.copied_from_p1

394

elif parent == 2:

396

elif parent == 2:

395

childcopies = changes.copied_from_p2

397

childcopies = changes.copied_from_p2

396

398

397

if childcopies:

399

if childcopies:

398

newcopies = copies.copy()

400

newcopies = copies.copy()

399

for dest, source in pycompat.iteritems(childcopies):

401

for dest, source in pycompat.iteritems(childcopies):

400

prev = copies.get(source)

402

prev = copies.get(source)

401

if prev is not None and prev[1] is not None:

403

if prev is not None and prev[1] is not None:

402

source = prev[1]

404

source = prev[1]

403

newcopies[dest] = (current_rev, source)

405

newcopies[dest] = (current_rev, source)

404

assert newcopies is not copies

406

assert newcopies is not copies

405

if changes.removed:

407

if changes.removed:

406

if newcopies is copies:

407

newcopies = copies.copy()

408

for f in changes.removed:

408

for f in changes.removed:

409

if f in newcopies:

409

if f in newcopies:

410

if newcopies is copies:

410

if newcopies is copies:

411

# copy on write to avoid affecting potential other

411

# copy on write to avoid affecting potential other

412

# branches. when there are no other branches, this

412

# branches. when there are no other branches, this

413

# could be avoided.

413

# could be avoided.

414

newcopies = copies.copy()

414

newcopies = copies.copy()

415

newcopies[f] = (current_rev, None)

415

newcopies[f] = (current_rev, None)

416

# check potential need to combine the data from another parent (for

416

# check potential need to combine the data from another parent (for

417

# that child). See comment below for details.

417

# that child). See comment below for details.

418

if current_copies is None:

418

if current_copies is None:

419

current_copies = newcopies

419

current_copies = newcopies

420

elif current_copies is newcopies:

421

# nothing to merge:

422

pass

423

else:

420

else:

424

# we are the second parent to work on c, we need to merge our

421

# we are the second parent to work on c, we need to merge our

425

# work with the other.

422

# work with the other.

426

#

423

#

427

# In case of conflict, parent 1 take precedence over parent 2.

424

# In case of conflict, parent 1 take precedence over parent 2.

428

# This is an arbitrary choice made anew when implementing

425

# This is an arbitrary choice made anew when implementing

429

# changeset based copies. It was made without regards with

426

# changeset based copies. It was made without regards with

430

# potential filelog related behavior.

427

# potential filelog related behavior.

431

assert parent == 2

428

assert parent == 2

432

current_copies = _merge_copies_dict(

429

current_copies = _merge_copies_dict(

433

newcopies, current_copies, isancestor, changes

430

newcopies, current_copies, isancestor, changes

434

)

431

)

435

all_copies[current_rev] = current_copies

432

all_copies[current_rev] = current_copies

436

433

437

# filter out internal details and return a {dest: source mapping}

434

# filter out internal details and return a {dest: source mapping}

438

final_copies = {}

435

final_copies = {}

439

for dest, (tt, source) in all_copies[targetrev].items():

436

for dest, (tt, source) in all_copies[targetrev].items():

440

if source is not None:

437

if source is not None:

441

final_copies[dest] = source

438

final_copies[dest] = source

442

if not alwaysmatch:

439

if not alwaysmatch:

443

for filename in list(final_copies.keys()):

440

for filename in list(final_copies.keys()):

444

if not match(filename):

441

if not match(filename):

445

del final_copies[filename]

442

del final_copies[filename]

446

return final_copies

443

return final_copies

447

444

448

445

449

# constant to decide which side to pick with _merge_copies_dict

446

# constant to decide which side to pick with _merge_copies_dict

450

PICK_MINOR = 0

447

PICK_MINOR = 0

451

PICK_MAJOR = 1

448

PICK_MAJOR = 1

452

PICK_EITHER = 2

449

PICK_EITHER = 2

453

450

454

451

455

def _merge_copies_dict(minor, major, isancestor, changes):

452

def _merge_copies_dict(minor, major, isancestor, changes):

456

"""merge two copies-mapping together, minor and major

453

"""merge two copies-mapping together, minor and major

457

454

458

In case of conflict, value from "major" will be picked.

455

In case of conflict, value from "major" will be picked.

459

456

460

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

457

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

461

ancestors of `high_rev`,

458

ancestors of `high_rev`,

462

459

463

- `ismerged(path)`: callable return True if `path` have been merged in the

460

- `ismerged(path)`: callable return True if `path` have been merged in the

464

current revision,

461

current revision,

465

462

466

return the resulting dict (in practice, the "minor" object, updated)

463

return the resulting dict (in practice, the "minor" object, updated)

467

"""

464

"""

468

for dest, value in major.items():

465

for dest, value in major.items():

469

other = minor.get(dest)

466

other = minor.get(dest)

470

if other is None:

467

if other is None:

471

minor[dest] = value

468

minor[dest] = value

472

else:

469

else:

473

pick = _compare_values(changes, isancestor, dest, other, value)

470

pick = _compare_values(changes, isancestor, dest, other, value)

474

if pick == PICK_MAJOR:

471

if pick == PICK_MAJOR:

475

minor[dest] = value

472

minor[dest] = value

476

return minor

473

return minor

477

474

478

475

479

def _compare_values(changes, isancestor, dest, minor, major):

476

def _compare_values(changes, isancestor, dest, minor, major):

480

"""compare two value within a _merge_copies_dict loop iteration"""

477

"""compare two value within a _merge_copies_dict loop iteration"""

481

major_tt, major_value = major

478

major_tt, major_value = major

482

minor_tt, minor_value = minor

479

minor_tt, minor_value = minor

483

480

484

# evacuate some simple case first:

481

# evacuate some simple case first:

485

if major_tt == minor_tt:

482

if major_tt == minor_tt:

486

# if it comes from the same revision it must be the same value

483

# if it comes from the same revision it must be the same value

487

assert major_value == minor_value

484

assert major_value == minor_value

488

return PICK_EITHER

485

return PICK_EITHER

489

elif major[1] == minor[1]:

486

elif major[1] == minor[1]:

490

return PICK_EITHER

487

return PICK_EITHER

491

488

492

# actual merging needed: content from "major" wins, unless it is older than

489

# actual merging needed: content from "major" wins, unless it is older than

493

# the branch point or there is a merge

490

# the branch point or there is a merge

494

elif changes is not None and major[1] is None and dest in changes.salvaged:

491

elif changes is not None and major[1] is None and dest in changes.salvaged:

495

return PICK_MINOR

492

return PICK_MINOR

496

elif changes is not None and minor[1] is None and dest in changes.salvaged:

493

elif changes is not None and minor[1] is None and dest in changes.salvaged:

497

return PICK_MAJOR

494

return PICK_MAJOR

498

elif changes is not None and dest in changes.merged:

495

elif changes is not None and dest in changes.merged:

499

return PICK_MAJOR

496

return PICK_MAJOR

500

elif not isancestor(major_tt, minor_tt):

497

elif not isancestor(major_tt, minor_tt):

501

if major[1] is not None:

498

if major[1] is not None:

502

return PICK_MAJOR

499

return PICK_MAJOR

503

elif isancestor(minor_tt, major_tt):

500

elif isancestor(minor_tt, major_tt):

504

return PICK_MAJOR

501

return PICK_MAJOR

505

return PICK_MINOR

502

return PICK_MINOR

506

503

507

504

508

def _revinfo_getter_extra(repo):

505

def _revinfo_getter_extra(repo):

509

"""return a function that return multiple data given a <rev>"i

506

"""return a function that return multiple data given a <rev>"i

510

507

511

* p1: revision number of first parent

508

* p1: revision number of first parent

512

* p2: revision number of first parent

509

* p2: revision number of first parent

513

* p1copies: mapping of copies from p1

510

* p1copies: mapping of copies from p1

514

* p2copies: mapping of copies from p2

511

* p2copies: mapping of copies from p2

515

* removed: a list of removed files

512

* removed: a list of removed files

516

* ismerged: a callback to know if file was merged in that revision

513

* ismerged: a callback to know if file was merged in that revision

517

"""

514

"""

518

cl = repo.changelog

515

cl = repo.changelog

519

parents = cl.parentrevs

516

parents = cl.parentrevs

520

517

521

def get_ismerged(rev):

518

def get_ismerged(rev):

522

ctx = repo[rev]

519

ctx = repo[rev]

523

520

524

def ismerged(path):

521

def ismerged(path):

525

if path not in ctx.files():

522

if path not in ctx.files():

526

return False

523

return False

527

fctx = ctx[path]

524

fctx = ctx[path]

528

parents = fctx._filelog.parents(fctx._filenode)

525

parents = fctx._filelog.parents(fctx._filenode)

529

nb_parents = 0

526

nb_parents = 0

530

for n in parents:

527

for n in parents:

531

if n != nullid:

528

if n != nullid:

532

nb_parents += 1

529

nb_parents += 1

533

return nb_parents >= 2

530

return nb_parents >= 2

534

531

535

return ismerged

532

return ismerged

536

533

537

def revinfo(rev):

534

def revinfo(rev):

538

p1, p2 = parents(rev)

535

p1, p2 = parents(rev)

539

ctx = repo[rev]

536

ctx = repo[rev]

540

p1copies, p2copies = ctx._copies

537

p1copies, p2copies = ctx._copies

541

removed = ctx.filesremoved()

538

removed = ctx.filesremoved()

542

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

539

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

543

540

544

return revinfo

541

return revinfo

545

542

546

543

547

def _combine_changeset_copies_extra(

544

def _combine_changeset_copies_extra(

548

revs, children, targetrev, revinfo, match, isancestor

545

revs, children, targetrev, revinfo, match, isancestor

549

):

546

):

550

"""version of `_combine_changeset_copies` that works with the Google

547

"""version of `_combine_changeset_copies` that works with the Google

551

specific "extra" based storage for copy information"""

548

specific "extra" based storage for copy information"""

552

all_copies = {}

549

all_copies = {}

553

alwaysmatch = match.always()

550

alwaysmatch = match.always()

554

for r in revs:

551

for r in revs:

555

copies = all_copies.pop(r, None)

552

copies = all_copies.pop(r, None)

556

if copies is None:

553

if copies is None:

557

# this is a root

554

# this is a root

558

copies = {}

555

copies = {}

559

for i, c in enumerate(children[r]):

556

for i, c in enumerate(children[r]):

560

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

557

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

561

if r == p1:

558

if r == p1:

562

parent = 1

559

parent = 1

563

childcopies = p1copies

560

childcopies = p1copies

564

else:

561

else:

565

assert r == p2

562

assert r == p2

566

parent = 2

563

parent = 2

567

childcopies = p2copies

564

childcopies = p2copies

568

if not alwaysmatch:

565

if not alwaysmatch:

569

childcopies = {

566

childcopies = {

570

dst: src for dst, src in childcopies.items() if match(dst)

567

dst: src for dst, src in childcopies.items() if match(dst)

571

}

568

}

572

newcopies = copies

569

newcopies = copies

573

if childcopies:

570

if childcopies:

574

newcopies = copies.copy()

571

newcopies = copies.copy()

575

for dest, source in pycompat.iteritems(childcopies):

572

for dest, source in pycompat.iteritems(childcopies):

576

prev = copies.get(source)

573

prev = copies.get(source)

577

if prev is not None and prev[1] is not None:

574

if prev is not None and prev[1] is not None:

578

source = prev[1]

575

source = prev[1]

579

newcopies[dest] = (c, source)

576

newcopies[dest] = (c, source)

580

assert newcopies is not copies

577

assert newcopies is not copies

581

for f in removed:

578

for f in removed:

582

if f in newcopies:

579

if f in newcopies:

583

if newcopies is copies:

580

if newcopies is copies:

584

# copy on write to avoid affecting potential other

581

# copy on write to avoid affecting potential other

585

# branches. when there are no other branches, this

582

# branches. when there are no other branches, this

586

# could be avoided.

583

# could be avoided.

587

newcopies = copies.copy()

584

newcopies = copies.copy()

588

newcopies[f] = (c, None)

585

newcopies[f] = (c, None)

589

othercopies = all_copies.get(c)

586

othercopies = all_copies.get(c)

590

if othercopies is None:

587

if othercopies is None:

591

all_copies[c] = newcopies

588

all_copies[c] = newcopies

592

else:

589

else:

593

# we are the second parent to work on c, we need to merge our

590

# we are the second parent to work on c, we need to merge our

594

# work with the other.

591

# work with the other.

595

#

592

#

596

# In case of conflict, parent 1 take precedence over parent 2.

593

# In case of conflict, parent 1 take precedence over parent 2.

597

# This is an arbitrary choice made anew when implementing

594

# This is an arbitrary choice made anew when implementing

598

# changeset based copies. It was made without regards with

595

# changeset based copies. It was made without regards with

599

# potential filelog related behavior.

596

# potential filelog related behavior.

600

if parent == 1:

597

if parent == 1:

601

_merge_copies_dict_extra(

598

_merge_copies_dict_extra(

602

othercopies, newcopies, isancestor, ismerged

599

othercopies, newcopies, isancestor, ismerged

603

)

600

)

604

else:

601

else:

605

_merge_copies_dict_extra(

602

_merge_copies_dict_extra(

606

newcopies, othercopies, isancestor, ismerged

603

newcopies, othercopies, isancestor, ismerged

607

)

604

)

608

all_copies[c] = newcopies

605

all_copies[c] = newcopies

609

606

610

final_copies = {}

607

final_copies = {}

611

for dest, (tt, source) in all_copies[targetrev].items():

608

for dest, (tt, source) in all_copies[targetrev].items():

612

if source is not None:

609

if source is not None:

613

final_copies[dest] = source

610

final_copies[dest] = source

614

return final_copies

611

return final_copies

615

612

616

613

617

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

614

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

618

"""version of `_merge_copies_dict` that works with the Google

615

"""version of `_merge_copies_dict` that works with the Google

619

specific "extra" based storage for copy information"""

616

specific "extra" based storage for copy information"""

620

for dest, value in major.items():

617

for dest, value in major.items():

621

other = minor.get(dest)

618

other = minor.get(dest)

622

if other is None:

619

if other is None:

623

minor[dest] = value

620

minor[dest] = value

624

else:

621

else:

625

new_tt = value[0]

622

new_tt = value[0]

626

other_tt = other[0]

623

other_tt = other[0]

627

if value[1] == other[1]:

624

if value[1] == other[1]:

628

continue

625

continue

629

# content from "major" wins, unless it is older

626

# content from "major" wins, unless it is older

630

# than the branch point or there is a merge

627

# than the branch point or there is a merge

631

if (

628

if (

632

new_tt == other_tt

629

new_tt == other_tt

633

or not isancestor(new_tt, other_tt)

630

or not isancestor(new_tt, other_tt)

634

or ismerged(dest)

631

or ismerged(dest)

635

):

632

):

636

minor[dest] = value

633

minor[dest] = value

637

634

638

635

639

def _forwardcopies(a, b, base=None, match=None):

636

def _forwardcopies(a, b, base=None, match=None):

640

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

637

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

641

638

642

if base is None:

639

if base is None:

643

base = a

640

base = a

644

match = a.repo().narrowmatch(match)

641

match = a.repo().narrowmatch(match)

645

# check for working copy

642

# check for working copy

646

if b.rev() is None:

643

if b.rev() is None:

647

cm = _committedforwardcopies(a, b.p1(), base, match)

644

cm = _committedforwardcopies(a, b.p1(), base, match)

648

# combine copies from dirstate if necessary

645

# combine copies from dirstate if necessary

649

copies = _chain(cm, _dirstatecopies(b._repo, match))

646

copies = _chain(cm, _dirstatecopies(b._repo, match))

650

else:

647

else:

651

copies = _committedforwardcopies(a, b, base, match)

648

copies = _committedforwardcopies(a, b, base, match)

652

return copies

649

return copies

653

650

654

651

655

def _backwardrenames(a, b, match):

652

def _backwardrenames(a, b, match):

656

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

653

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

657

return {}

654

return {}

658

655

659

# Even though we're not taking copies into account, 1:n rename situations

656

# Even though we're not taking copies into account, 1:n rename situations

660

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

657

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

661

# arbitrarily pick one of the renames.

658

# arbitrarily pick one of the renames.

662

# We don't want to pass in "match" here, since that would filter

659

# We don't want to pass in "match" here, since that would filter

663

# the destination by it. Since we're reversing the copies, we want

660

# the destination by it. Since we're reversing the copies, we want

664

# to filter the source instead.

661

# to filter the source instead.

665

f = _forwardcopies(b, a)

662

f = _forwardcopies(b, a)

666

r = {}

663

r = {}

667

for k, v in sorted(pycompat.iteritems(f)):

664

for k, v in sorted(pycompat.iteritems(f)):

668

if match and not match(v):

665

if match and not match(v):

669

continue

666

continue

670

# remove copies

667

# remove copies

671

if v in a:

668

if v in a:

672

continue

669

continue

673

r[v] = k

670

r[v] = k

674

return r

671

return r

675

672

676

673

677

def pathcopies(x, y, match=None):

674

def pathcopies(x, y, match=None):

678

"""find {dst@y: src@x} copy mapping for directed compare"""

675

"""find {dst@y: src@x} copy mapping for directed compare"""

679

repo = x._repo

676

repo = x._repo

680

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

677

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

681

if debug:

678

if debug:

682

repo.ui.debug(

679

repo.ui.debug(

683

b'debug.copies: searching copies from %s to %s\n' % (x, y)

680

b'debug.copies: searching copies from %s to %s\n' % (x, y)

684

)

681

)

685

if x == y or not x or not y:

682

if x == y or not x or not y:

686

return {}

683

return {}

687

if y.rev() is None and x == y.p1():

684

if y.rev() is None and x == y.p1():

688

if debug:

685

if debug:

689

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

686

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

690

# short-circuit to avoid issues with merge states

687

# short-circuit to avoid issues with merge states

691

return _dirstatecopies(repo, match)

688

return _dirstatecopies(repo, match)

692

a = y.ancestor(x)

689

a = y.ancestor(x)

693

if a == x:

690

if a == x:

694

if debug:

691

if debug:

695

repo.ui.debug(b'debug.copies: search mode: forward\n')

692

repo.ui.debug(b'debug.copies: search mode: forward\n')

696

copies = _forwardcopies(x, y, match=match)

693

copies = _forwardcopies(x, y, match=match)

697

elif a == y:

694

elif a == y:

698

if debug:

695

if debug:

699

repo.ui.debug(b'debug.copies: search mode: backward\n')

696

repo.ui.debug(b'debug.copies: search mode: backward\n')

700

copies = _backwardrenames(x, y, match=match)

697

copies = _backwardrenames(x, y, match=match)

701

else:

698

else:

702

if debug:

699

if debug:

703

repo.ui.debug(b'debug.copies: search mode: combined\n')

700

repo.ui.debug(b'debug.copies: search mode: combined\n')

704

base = None

701

base = None

705

if a.rev() != nullrev:

702

if a.rev() != nullrev:

706

base = x

703

base = x

707

copies = _chain(

704

copies = _chain(

708

_backwardrenames(x, a, match=match),

705

_backwardrenames(x, a, match=match),

709

_forwardcopies(a, y, base, match=match),

706

_forwardcopies(a, y, base, match=match),

710

)

707

)

711

_filter(x, y, copies)

708

_filter(x, y, copies)

712

return copies

709

return copies

713

710

714

711

715

def mergecopies(repo, c1, c2, base):

712

def mergecopies(repo, c1, c2, base):

716

"""

713

"""

717

Finds moves and copies between context c1 and c2 that are relevant for

714

Finds moves and copies between context c1 and c2 that are relevant for

718

merging. 'base' will be used as the merge base.

715

merging. 'base' will be used as the merge base.

719

716

720

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

717

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

721

files that were moved/ copied in one merge parent and modified in another.

718

files that were moved/ copied in one merge parent and modified in another.

722

For example:

719

For example:

723

720

724

o ---> 4 another commit

721

o ---> 4 another commit

725

|

722

|

726

| o ---> 3 commit that modifies a.txt

723

| o ---> 3 commit that modifies a.txt

727

| /

724

| /

728

o / ---> 2 commit that moves a.txt to b.txt

725

o / ---> 2 commit that moves a.txt to b.txt

729

|/

726

|/

730

o ---> 1 merge base

727

o ---> 1 merge base

731

728

732

If we try to rebase revision 3 on revision 4, since there is no a.txt in

729

If we try to rebase revision 3 on revision 4, since there is no a.txt in

733

revision 4, and if user have copytrace disabled, we prints the following

730

revision 4, and if user have copytrace disabled, we prints the following

734

message:

731

message:

735

732

736

```other changed <file> which local deleted```

733

```other changed <file> which local deleted```

737

734

738

Returns a tuple where:

735

Returns a tuple where:

739

736

740

"branch_copies" an instance of branch_copies.

737

"branch_copies" an instance of branch_copies.

741

738

742

"diverge" is a mapping of source name -> list of destination names

739

"diverge" is a mapping of source name -> list of destination names

743

for divergent renames.

740

for divergent renames.

744

741

745

This function calls different copytracing algorithms based on config.

742

This function calls different copytracing algorithms based on config.

746

"""

743

"""

747

# avoid silly behavior for update from empty dir

744

# avoid silly behavior for update from empty dir

748

if not c1 or not c2 or c1 == c2:

745

if not c1 or not c2 or c1 == c2:

749

return branch_copies(), branch_copies(), {}

746

return branch_copies(), branch_copies(), {}

750

747

751

narrowmatch = c1.repo().narrowmatch()

748

narrowmatch = c1.repo().narrowmatch()

752

749

753

# avoid silly behavior for parent -> working dir

750

# avoid silly behavior for parent -> working dir

754

if c2.node() is None and c1.node() == repo.dirstate.p1():

751

if c2.node() is None and c1.node() == repo.dirstate.p1():

755

return (

752

return (

756

branch_copies(_dirstatecopies(repo, narrowmatch)),

753

branch_copies(_dirstatecopies(repo, narrowmatch)),

757

branch_copies(),

754

branch_copies(),

758

{},

755

{},

759

)

756

)

760

757

761

copytracing = repo.ui.config(b'experimental', b'copytrace')

758

copytracing = repo.ui.config(b'experimental', b'copytrace')

762

if stringutil.parsebool(copytracing) is False:

759

if stringutil.parsebool(copytracing) is False:

763

# stringutil.parsebool() returns None when it is unable to parse the

760

# stringutil.parsebool() returns None when it is unable to parse the

764

# value, so we should rely on making sure copytracing is on such cases

761

# value, so we should rely on making sure copytracing is on such cases

765

return branch_copies(), branch_copies(), {}

762

return branch_copies(), branch_copies(), {}

766

763

767

if usechangesetcentricalgo(repo):

764

if usechangesetcentricalgo(repo):

768

# The heuristics don't make sense when we need changeset-centric algos

765

# The heuristics don't make sense when we need changeset-centric algos

769

return _fullcopytracing(repo, c1, c2, base)

766

return _fullcopytracing(repo, c1, c2, base)

770

767

771

# Copy trace disabling is explicitly below the node == p1 logic above

768

# Copy trace disabling is explicitly below the node == p1 logic above

772

# because the logic above is required for a simple copy to be kept across a

769

# because the logic above is required for a simple copy to be kept across a

773

# rebase.

770

# rebase.

774

if copytracing == b'heuristics':

771

if copytracing == b'heuristics':

775

# Do full copytracing if only non-public revisions are involved as

772

# Do full copytracing if only non-public revisions are involved as

776

# that will be fast enough and will also cover the copies which could

773

# that will be fast enough and will also cover the copies which could

777

# be missed by heuristics

774

# be missed by heuristics

778

if _isfullcopytraceable(repo, c1, base):

775

if _isfullcopytraceable(repo, c1, base):

779

return _fullcopytracing(repo, c1, c2, base)

776

return _fullcopytracing(repo, c1, c2, base)

780

return _heuristicscopytracing(repo, c1, c2, base)

777

return _heuristicscopytracing(repo, c1, c2, base)

781

else:

778

else:

782

return _fullcopytracing(repo, c1, c2, base)

779

return _fullcopytracing(repo, c1, c2, base)

783

780

784

781

785

def _isfullcopytraceable(repo, c1, base):

782

def _isfullcopytraceable(repo, c1, base):

786

"""Checks that if base, source and destination are all no-public branches,

783

"""Checks that if base, source and destination are all no-public branches,

787

if yes let's use the full copytrace algorithm for increased capabilities

784

if yes let's use the full copytrace algorithm for increased capabilities

788

since it will be fast enough.

785

since it will be fast enough.

789

786

790

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

787

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

791

number of changesets from c1 to base such that if number of changesets are

788

number of changesets from c1 to base such that if number of changesets are

792

more than the limit, full copytracing algorithm won't be used.

789

more than the limit, full copytracing algorithm won't be used.

793

"""

790

"""

794

if c1.rev() is None:

791

if c1.rev() is None:

795

c1 = c1.p1()

792

c1 = c1.p1()

796

if c1.mutable() and base.mutable():

793

if c1.mutable() and base.mutable():

797

sourcecommitlimit = repo.ui.configint(

794

sourcecommitlimit = repo.ui.configint(

798

b'experimental', b'copytrace.sourcecommitlimit'

795

b'experimental', b'copytrace.sourcecommitlimit'

799

)

796

)

800

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

797

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

801

return commits < sourcecommitlimit

798

return commits < sourcecommitlimit

802

return False

799

return False

803

800

804

801

805

def _checksinglesidecopies(

802

def _checksinglesidecopies(

806

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

803

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

807

):

804

):

808

if src not in m2:

805

if src not in m2:

809

# deleted on side 2

806

# deleted on side 2

810

if src not in m1:

807

if src not in m1:

811

# renamed on side 1, deleted on side 2

808

# renamed on side 1, deleted on side 2

812

renamedelete[src] = dsts1

809

renamedelete[src] = dsts1

813

elif src not in mb:

810

elif src not in mb:

814

# Work around the "short-circuit to avoid issues with merge states"

811

# Work around the "short-circuit to avoid issues with merge states"

815

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

812

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

816

# destination doesn't exist in y.

813

# destination doesn't exist in y.

817

pass

814

pass

818

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

815

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

819

return

816

return

820

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

817

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

821

# modified on side 2

818

# modified on side 2

822

for dst in dsts1:

819

for dst in dsts1:

823

copy[dst] = src

820

copy[dst] = src

824

821

825

822

826

class branch_copies(object):

823

class branch_copies(object):

827

"""Information about copies made on one side of a merge/graft.

824

"""Information about copies made on one side of a merge/graft.

828

825

829

"copy" is a mapping from destination name -> source name,

826

"copy" is a mapping from destination name -> source name,

830

where source is in c1 and destination is in c2 or vice-versa.

827

where source is in c1 and destination is in c2 or vice-versa.

831

828

832

"movewithdir" is a mapping from source name -> destination name,

829

"movewithdir" is a mapping from source name -> destination name,

833

where the file at source present in one context but not the other

830

where the file at source present in one context but not the other

834

needs to be moved to destination by the merge process, because the

831

needs to be moved to destination by the merge process, because the

835

other context moved the directory it is in.

832

other context moved the directory it is in.

836

833

837

"renamedelete" is a mapping of source name -> list of destination

834

"renamedelete" is a mapping of source name -> list of destination

838

names for files deleted in c1 that were renamed in c2 or vice-versa.

835

names for files deleted in c1 that were renamed in c2 or vice-versa.

839

836

840

"dirmove" is a mapping of detected source dir -> destination dir renames.

837

"dirmove" is a mapping of detected source dir -> destination dir renames.

841

This is needed for handling changes to new files previously grafted into

838

This is needed for handling changes to new files previously grafted into

842

renamed directories.

839

renamed directories.

843

"""

840

"""

844

841

845

def __init__(

842

def __init__(

846

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

843

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

847

):

844

):

848

self.copy = {} if copy is None else copy

845

self.copy = {} if copy is None else copy

849

self.renamedelete = {} if renamedelete is None else renamedelete

846

self.renamedelete = {} if renamedelete is None else renamedelete

850

self.dirmove = {} if dirmove is None else dirmove

847

self.dirmove = {} if dirmove is None else dirmove

851

self.movewithdir = {} if movewithdir is None else movewithdir

848

self.movewithdir = {} if movewithdir is None else movewithdir

852

849

853

def __repr__(self):

850

def __repr__(self):

854

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

851

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

855

self.copy,

852

self.copy,

856

self.renamedelete,

853

self.renamedelete,

857

self.dirmove,

854

self.dirmove,

858

self.movewithdir,

855

self.movewithdir,

859

)

856

)

860

857

861

858

862

def _fullcopytracing(repo, c1, c2, base):

859

def _fullcopytracing(repo, c1, c2, base):

863

"""The full copytracing algorithm which finds all the new files that were

860

"""The full copytracing algorithm which finds all the new files that were

864

added from merge base up to the top commit and for each file it checks if

861

added from merge base up to the top commit and for each file it checks if

865

this file was copied from another file.

862

this file was copied from another file.

866

863

867

This is pretty slow when a lot of changesets are involved but will track all

864

This is pretty slow when a lot of changesets are involved but will track all

868

the copies.

865

the copies.

869

"""

866

"""

870

m1 = c1.manifest()

867

m1 = c1.manifest()

871

m2 = c2.manifest()

868

m2 = c2.manifest()

872

mb = base.manifest()

869

mb = base.manifest()

873

870

874

copies1 = pathcopies(base, c1)

871

copies1 = pathcopies(base, c1)

875

copies2 = pathcopies(base, c2)

872

copies2 = pathcopies(base, c2)

876

873

877

if not (copies1 or copies2):

874

if not (copies1 or copies2):

878

return branch_copies(), branch_copies(), {}

875

return branch_copies(), branch_copies(), {}

879

876

880

inversecopies1 = {}

877

inversecopies1 = {}

881

inversecopies2 = {}

878

inversecopies2 = {}

882

for dst, src in copies1.items():

879

for dst, src in copies1.items():

883

inversecopies1.setdefault(src, []).append(dst)

880

inversecopies1.setdefault(src, []).append(dst)

884

for dst, src in copies2.items():

881

for dst, src in copies2.items():

885

inversecopies2.setdefault(src, []).append(dst)

882

inversecopies2.setdefault(src, []).append(dst)

886

883

887

copy1 = {}

884

copy1 = {}

888

copy2 = {}

885

copy2 = {}

889

diverge = {}

886

diverge = {}

890

renamedelete1 = {}

887

renamedelete1 = {}

891

renamedelete2 = {}

888

renamedelete2 = {}

892

allsources = set(inversecopies1) | set(inversecopies2)

889

allsources = set(inversecopies1) | set(inversecopies2)

893

for src in allsources:

890

for src in allsources:

894

dsts1 = inversecopies1.get(src)

891

dsts1 = inversecopies1.get(src)

895

dsts2 = inversecopies2.get(src)

892

dsts2 = inversecopies2.get(src)

896

if dsts1 and dsts2:

893

if dsts1 and dsts2:

897

# copied/renamed on both sides

894

# copied/renamed on both sides

898

if src not in m1 and src not in m2:

895

if src not in m1 and src not in m2:

899

# renamed on both sides

896

# renamed on both sides

900

dsts1 = set(dsts1)

897

dsts1 = set(dsts1)

901

dsts2 = set(dsts2)

898

dsts2 = set(dsts2)

902

# If there's some overlap in the rename destinations, we

899

# If there's some overlap in the rename destinations, we

903

# consider it not divergent. For example, if side 1 copies 'a'

900

# consider it not divergent. For example, if side 1 copies 'a'

904

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

901

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

905

# and 'd' and deletes 'a'.

902

# and 'd' and deletes 'a'.

906

if dsts1 & dsts2:

903

if dsts1 & dsts2:

907

for dst in dsts1 & dsts2:

904

for dst in dsts1 & dsts2:

908

copy1[dst] = src

905

copy1[dst] = src

909

copy2[dst] = src

906

copy2[dst] = src

910

else:

907

else:

911

diverge[src] = sorted(dsts1 | dsts2)

908

diverge[src] = sorted(dsts1 | dsts2)

912

elif src in m1 and src in m2:

909

elif src in m1 and src in m2:

913

# copied on both sides

910

# copied on both sides

914

dsts1 = set(dsts1)

911

dsts1 = set(dsts1)

915

dsts2 = set(dsts2)

912

dsts2 = set(dsts2)

916

for dst in dsts1 & dsts2:

913

for dst in dsts1 & dsts2:

917

copy1[dst] = src

914

copy1[dst] = src

918

copy2[dst] = src

915

copy2[dst] = src

919

# TODO: Handle cases where it was renamed on one side and copied

916

# TODO: Handle cases where it was renamed on one side and copied

920

# on the other side

917

# on the other side

921

elif dsts1:

918

elif dsts1:

922

# copied/renamed only on side 1

919

# copied/renamed only on side 1

923

_checksinglesidecopies(

920

_checksinglesidecopies(

924

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

921

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

925

)

922

)

926

elif dsts2:

923

elif dsts2:

927

# copied/renamed only on side 2

924

# copied/renamed only on side 2

928

_checksinglesidecopies(

925

_checksinglesidecopies(

929

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

926

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

930

)

927

)

931

928

932

# find interesting file sets from manifests

929

# find interesting file sets from manifests

933

cache = []

930

cache = []

934

931

935

def _get_addedfiles(idx):

932

def _get_addedfiles(idx):

936

if not cache:

933

if not cache:

937

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

934

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

938

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

935

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

939

u1 = sorted(addedinm1 - addedinm2)

936

u1 = sorted(addedinm1 - addedinm2)

940

u2 = sorted(addedinm2 - addedinm1)

937

u2 = sorted(addedinm2 - addedinm1)

941

cache.extend((u1, u2))

938

cache.extend((u1, u2))

942

return cache[idx]

939

return cache[idx]

943

940

944

u1fn = lambda: _get_addedfiles(0)

941

u1fn = lambda: _get_addedfiles(0)

945

u2fn = lambda: _get_addedfiles(1)

942

u2fn = lambda: _get_addedfiles(1)

946

if repo.ui.debugflag:

943

if repo.ui.debugflag:

947

u1 = u1fn()

944

u1 = u1fn()

948

u2 = u2fn()

945

u2 = u2fn()

949

946

950

header = b" unmatched files in %s"

947

header = b" unmatched files in %s"

951

if u1:

948

if u1:

952

repo.ui.debug(

949

repo.ui.debug(

953

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

950

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

954

)

951

)

955

if u2:

952

if u2:

956

repo.ui.debug(

953

repo.ui.debug(

957

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

954

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

958

)

955

)

959

956

960

renamedeleteset = set()

957

renamedeleteset = set()

961

divergeset = set()

958

divergeset = set()

962

for dsts in diverge.values():

959

for dsts in diverge.values():

963

divergeset.update(dsts)

960

divergeset.update(dsts)

964

for dsts in renamedelete1.values():

961

for dsts in renamedelete1.values():

965

renamedeleteset.update(dsts)

962

renamedeleteset.update(dsts)

966

for dsts in renamedelete2.values():

963

for dsts in renamedelete2.values():

967

renamedeleteset.update(dsts)

964

renamedeleteset.update(dsts)

968

965

969

repo.ui.debug(

966

repo.ui.debug(

970

b" all copies found (* = to merge, ! = divergent, "

967

b" all copies found (* = to merge, ! = divergent, "

971

b"% = renamed and deleted):\n"

968

b"% = renamed and deleted):\n"

972

)

969

)

973

for side, copies in ((b"local", copies1), (b"remote", copies2)):

970

for side, copies in ((b"local", copies1), (b"remote", copies2)):

974

if not copies:

971

if not copies:

975

continue

972

continue

976

repo.ui.debug(b" on %s side:\n" % side)

973

repo.ui.debug(b" on %s side:\n" % side)

977

for f in sorted(copies):

974

for f in sorted(copies):

978

note = b""

975

note = b""

979

if f in copy1 or f in copy2:

976

if f in copy1 or f in copy2:

980

note += b"*"

977

note += b"*"

981

if f in divergeset:

978

if f in divergeset:

982

note += b"!"

979

note += b"!"

983

if f in renamedeleteset:

980

if f in renamedeleteset:

984

note += b"%"

981

note += b"%"

985

repo.ui.debug(

982

repo.ui.debug(

986

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

983

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

987

)

984

)

988

del renamedeleteset

985

del renamedeleteset

989

del divergeset

986

del divergeset

990

987

991

repo.ui.debug(b" checking for directory renames\n")

988

repo.ui.debug(b" checking for directory renames\n")

992

989

993

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

990

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

994

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

991

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

995

992

996

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

993

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

997

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

994

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

998

995

999

return branch_copies1, branch_copies2, diverge

996

return branch_copies1, branch_copies2, diverge

1000

997

1001

998

1002

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

999

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1003

"""Finds moved directories and files that should move with them.

1000

"""Finds moved directories and files that should move with them.

1004

1001

1005

ctx: the context for one of the sides

1002

ctx: the context for one of the sides

1006

copy: files copied on the same side (as ctx)

1003

copy: files copied on the same side (as ctx)

1007

fullcopy: files copied on the same side (as ctx), including those that

1004

fullcopy: files copied on the same side (as ctx), including those that

1008

merge.manifestmerge() won't care about

1005

merge.manifestmerge() won't care about

1009

addedfilesfn: function returning added files on the other side (compared to

1006

addedfilesfn: function returning added files on the other side (compared to

1010

ctx)

1007

ctx)

1011

"""

1008

"""

1012

# generate a directory move map

1009

# generate a directory move map

1013

invalid = set()

1010

invalid = set()

1014

dirmove = {}

1011

dirmove = {}

1015

1012

1016

# examine each file copy for a potential directory move, which is

1013

# examine each file copy for a potential directory move, which is

1017

# when all the files in a directory are moved to a new directory

1014

# when all the files in a directory are moved to a new directory

1018

for dst, src in pycompat.iteritems(fullcopy):

1015

for dst, src in pycompat.iteritems(fullcopy):

1019

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1016

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1020

if dsrc in invalid:

1017

if dsrc in invalid:

1021

# already seen to be uninteresting

1018

# already seen to be uninteresting

1022

continue

1019

continue

1023

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1020

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1024

# directory wasn't entirely moved locally

1021

# directory wasn't entirely moved locally

1025

invalid.add(dsrc)

1022

invalid.add(dsrc)

1026

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1023

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1027

# files from the same directory moved to two different places

1024

# files from the same directory moved to two different places

1028

invalid.add(dsrc)

1025

invalid.add(dsrc)

1029

else:

1026

else:

1030

# looks good so far

1027

# looks good so far

1031

dirmove[dsrc] = ddst

1028

dirmove[dsrc] = ddst

1032

1029

1033

for i in invalid:

1030

for i in invalid:

1034

if i in dirmove:

1031

if i in dirmove:

1035

del dirmove[i]

1032

del dirmove[i]

1036

del invalid

1033

del invalid

1037

1034

1038

if not dirmove:

1035

if not dirmove:

1039

return {}, {}

1036

return {}, {}

1040

1037

1041

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1038

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1042

1039

1043

for d in dirmove:

1040

for d in dirmove:

1044

repo.ui.debug(

1041

repo.ui.debug(

1045

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1042

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1046

)

1043

)

1047

1044

1048

movewithdir = {}

1045

movewithdir = {}

1049

# check unaccounted nonoverlapping files against directory moves

1046

# check unaccounted nonoverlapping files against directory moves

1050

for f in addedfilesfn():

1047

for f in addedfilesfn():

1051

if f not in fullcopy:

1048

if f not in fullcopy:

1052

for d in dirmove:

1049

for d in dirmove:

1053

if f.startswith(d):

1050

if f.startswith(d):

1054

# new file added in a directory that was moved, move it

1051

# new file added in a directory that was moved, move it

1055

df = dirmove[d] + f[len(d) :]

1052

df = dirmove[d] + f[len(d) :]

1056

if df not in copy:

1053

if df not in copy:

1057

movewithdir[f] = df

1054

movewithdir[f] = df

1058

repo.ui.debug(

1055

repo.ui.debug(

1059

b" pending file src: '%s' -> dst: '%s'\n"

1056

b" pending file src: '%s' -> dst: '%s'\n"

1060

% (f, df)

1057

% (f, df)

1061

)

1058

)

1062

break

1059

break

1063

1060

1064

return dirmove, movewithdir

1061

return dirmove, movewithdir

1065

1062

1066

1063

1067

def _heuristicscopytracing(repo, c1, c2, base):

1064

def _heuristicscopytracing(repo, c1, c2, base):

1068

"""Fast copytracing using filename heuristics

1065

"""Fast copytracing using filename heuristics

1069

1066

1070

Assumes that moves or renames are of following two types:

1067

Assumes that moves or renames are of following two types:

1071

1068

1072

1) Inside a directory only (same directory name but different filenames)

1069

1) Inside a directory only (same directory name but different filenames)

1073

2) Move from one directory to another

1070

2) Move from one directory to another

1074

(same filenames but different directory names)

1071

(same filenames but different directory names)

1075

1072

1076

Works only when there are no merge commits in the "source branch".

1073

Works only when there are no merge commits in the "source branch".

1077

Source branch is commits from base up to c2 not including base.

1074

Source branch is commits from base up to c2 not including base.

1078

1075

1079

If merge is involved it fallbacks to _fullcopytracing().

1076

If merge is involved it fallbacks to _fullcopytracing().

1080

1077

1081

Can be used by setting the following config:

1078

Can be used by setting the following config:

1082

1079

1083

[experimental]

1080

[experimental]

1084

copytrace = heuristics

1081

copytrace = heuristics

1085

1082

1086

In some cases the copy/move candidates found by heuristics can be very large

1083

In some cases the copy/move candidates found by heuristics can be very large

1087

in number and that will make the algorithm slow. The number of possible

1084

in number and that will make the algorithm slow. The number of possible

1088

candidates to check can be limited by using the config

1085

candidates to check can be limited by using the config

1089

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1086

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1090

"""

1087

"""

1091

1088

1092

if c1.rev() is None:

1089

if c1.rev() is None:

1093

c1 = c1.p1()

1090

c1 = c1.p1()

1094

if c2.rev() is None:

1091

if c2.rev() is None:

1095

c2 = c2.p1()

1092

c2 = c2.p1()

1096

1093

1097

changedfiles = set()

1094

changedfiles = set()

1098

m1 = c1.manifest()

1095

m1 = c1.manifest()

1099

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1096

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1100

# If base is not in c2 branch, we switch to fullcopytracing

1097

# If base is not in c2 branch, we switch to fullcopytracing

1101

repo.ui.debug(

1098

repo.ui.debug(

1102

b"switching to full copytracing as base is not "

1099

b"switching to full copytracing as base is not "

1103

b"an ancestor of c2\n"

1100

b"an ancestor of c2\n"

1104

)

1101

)

1105

return _fullcopytracing(repo, c1, c2, base)

1102

return _fullcopytracing(repo, c1, c2, base)

1106

1103

1107

ctx = c2

1104

ctx = c2

1108

while ctx != base:

1105

while ctx != base:

1109

if len(ctx.parents()) == 2:

1106

if len(ctx.parents()) == 2:

1110

# To keep things simple let's not handle merges

1107

# To keep things simple let's not handle merges

1111

repo.ui.debug(b"switching to full copytracing because of merges\n")

1108

repo.ui.debug(b"switching to full copytracing because of merges\n")

1112

return _fullcopytracing(repo, c1, c2, base)

1109

return _fullcopytracing(repo, c1, c2, base)

1113

changedfiles.update(ctx.files())

1110

changedfiles.update(ctx.files())

1114

ctx = ctx.p1()

1111

ctx = ctx.p1()

1115

1112

1116

copies2 = {}

1113

copies2 = {}

1117

cp = _forwardcopies(base, c2)

1114

cp = _forwardcopies(base, c2)

1118

for dst, src in pycompat.iteritems(cp):

1115

for dst, src in pycompat.iteritems(cp):

1119

if src in m1:

1116

if src in m1:

1120

copies2[dst] = src

1117

copies2[dst] = src

1121

1118

1122

# file is missing if it isn't present in the destination, but is present in

1119

# file is missing if it isn't present in the destination, but is present in

1123

# the base and present in the source.

1120

# the base and present in the source.

1124

# Presence in the base is important to exclude added files, presence in the

1121

# Presence in the base is important to exclude added files, presence in the

1125

# source is important to exclude removed files.

1122

# source is important to exclude removed files.

1126

filt = lambda f: f not in m1 and f in base and f in c2

1123

filt = lambda f: f not in m1 and f in base and f in c2

1127

missingfiles = [f for f in changedfiles if filt(f)]

1124

missingfiles = [f for f in changedfiles if filt(f)]

1128

1125

1129

copies1 = {}

1126

copies1 = {}

1130

if missingfiles:

1127

if missingfiles:

1131

basenametofilename = collections.defaultdict(list)

1128

basenametofilename = collections.defaultdict(list)

1132

dirnametofilename = collections.defaultdict(list)

1129

dirnametofilename = collections.defaultdict(list)

1133

1130

1134

for f in m1.filesnotin(base.manifest()):

1131

for f in m1.filesnotin(base.manifest()):

1135

basename = os.path.basename(f)

1132

basename = os.path.basename(f)

1136

dirname = os.path.dirname(f)

1133

dirname = os.path.dirname(f)

1137

basenametofilename[basename].append(f)

1134

basenametofilename[basename].append(f)

1138

dirnametofilename[dirname].append(f)

1135

dirnametofilename[dirname].append(f)

1139

1136

1140

for f in missingfiles:

1137

for f in missingfiles:

1141

basename = os.path.basename(f)

1138

basename = os.path.basename(f)

1142

dirname = os.path.dirname(f)

1139

dirname = os.path.dirname(f)

1143

samebasename = basenametofilename[basename]

1140

samebasename = basenametofilename[basename]

1144

samedirname = dirnametofilename[dirname]

1141

samedirname = dirnametofilename[dirname]

1145

movecandidates = samebasename + samedirname

1142

movecandidates = samebasename + samedirname

1146

# f is guaranteed to be present in c2, that's why

1143

# f is guaranteed to be present in c2, that's why

1147

# c2.filectx(f) won't fail

1144

# c2.filectx(f) won't fail

1148

f2 = c2.filectx(f)

1145

f2 = c2.filectx(f)

1149

# we can have a lot of candidates which can slow down the heuristics

1146

# we can have a lot of candidates which can slow down the heuristics

1150

# config value to limit the number of candidates moves to check

1147

# config value to limit the number of candidates moves to check

1151

maxcandidates = repo.ui.configint(

1148

maxcandidates = repo.ui.configint(

1152

b'experimental', b'copytrace.movecandidateslimit'

1149

b'experimental', b'copytrace.movecandidateslimit'

1153

)

1150

)

1154

1151

1155

if len(movecandidates) > maxcandidates:

1152

if len(movecandidates) > maxcandidates:

1156

repo.ui.status(

1153

repo.ui.status(

1157

_(

1154

_(

1158

b"skipping copytracing for '%s', more "

1155

b"skipping copytracing for '%s', more "

1159

b"candidates than the limit: %d\n"

1156

b"candidates than the limit: %d\n"

1160

)

1157

)

1161

% (f, len(movecandidates))

1158

% (f, len(movecandidates))

1162

)

1159

)

1163

continue

1160

continue

1164

1161

1165

for candidate in movecandidates:

1162

for candidate in movecandidates:

1166

f1 = c1.filectx(candidate)

1163

f1 = c1.filectx(candidate)

1167

if _related(f1, f2):

1164

if _related(f1, f2):

1168

# if there are a few related copies then we'll merge

1165

# if there are a few related copies then we'll merge

1169

# changes into all of them. This matches the behaviour

1166

# changes into all of them. This matches the behaviour

1170

# of upstream copytracing

1167

# of upstream copytracing

1171

copies1[candidate] = f

1168

copies1[candidate] = f

1172

1169

1173

return branch_copies(copies1), branch_copies(copies2), {}

1170

return branch_copies(copies1), branch_copies(copies2), {}

1174

1171

1175

1172

1176

def _related(f1, f2):

1173

def _related(f1, f2):

1177

"""return True if f1 and f2 filectx have a common ancestor

1174

"""return True if f1 and f2 filectx have a common ancestor

1178

1175

1179

Walk back to common ancestor to see if the two files originate

1176

Walk back to common ancestor to see if the two files originate

1180

from the same file. Since workingfilectx's rev() is None it messes

1177

from the same file. Since workingfilectx's rev() is None it messes

1181

up the integer comparison logic, hence the pre-step check for

1178

up the integer comparison logic, hence the pre-step check for

1182

None (f1 and f2 can only be workingfilectx's initially).

1179

None (f1 and f2 can only be workingfilectx's initially).

1183

"""

1180

"""

1184

1181

1185

if f1 == f2:

1182

if f1 == f2:

1186

return True # a match

1183

return True # a match

1187

1184

1188

g1, g2 = f1.ancestors(), f2.ancestors()

1185

g1, g2 = f1.ancestors(), f2.ancestors()

1189

try:

1186

try:

1190

f1r, f2r = f1.linkrev(), f2.linkrev()

1187

f1r, f2r = f1.linkrev(), f2.linkrev()

1191

1188

1192

if f1r is None:

1189

if f1r is None:

1193

f1 = next(g1)

1190

f1 = next(g1)

1194

if f2r is None:

1191

if f2r is None:

1195

f2 = next(g2)

1192

f2 = next(g2)

1196

1193

1197

while True:

1194

while True:

1198

f1r, f2r = f1.linkrev(), f2.linkrev()

1195

f1r, f2r = f1.linkrev(), f2.linkrev()

1199

if f1r > f2r:

1196

if f1r > f2r:

1200

f1 = next(g1)

1197

f1 = next(g1)

1201

elif f2r > f1r:

1198

elif f2r > f1r:

1202

f2 = next(g2)

1199

f2 = next(g2)

1203

else: # f1 and f2 point to files in the same linkrev

1200

else: # f1 and f2 point to files in the same linkrev

1204

return f1 == f2 # true if they point to the same file

1201

return f1 == f2 # true if they point to the same file

1205

except StopIteration:

1202

except StopIteration:

1206

return False

1203

return False

1207

1204

1208

1205

1209

def graftcopies(wctx, ctx, base):

1206

def graftcopies(wctx, ctx, base):

1210

"""reproduce copies between base and ctx in the wctx

1207

"""reproduce copies between base and ctx in the wctx

1211

1208

1212

Unlike mergecopies(), this function will only consider copies between base

1209

Unlike mergecopies(), this function will only consider copies between base

1213

and ctx; it will ignore copies between base and wctx. Also unlike

1210

and ctx; it will ignore copies between base and wctx. Also unlike

1214

mergecopies(), this function will apply copies to the working copy (instead

1211

mergecopies(), this function will apply copies to the working copy (instead

1215

of just returning information about the copies). That makes it cheaper

1212

of just returning information about the copies). That makes it cheaper

1216

(especially in the common case of base==ctx.p1()) and useful also when

1213

(especially in the common case of base==ctx.p1()) and useful also when

1217

experimental.copytrace=off.

1214

experimental.copytrace=off.

1218

1215

1219

merge.update() will have already marked most copies, but it will only

1216

merge.update() will have already marked most copies, but it will only

1220

mark copies if it thinks the source files are related (see

1217

mark copies if it thinks the source files are related (see

1221

merge._related()). It will also not mark copies if the file wasn't modified

1218

merge._related()). It will also not mark copies if the file wasn't modified

1222

on the local side. This function adds the copies that were "missed"

1219

on the local side. This function adds the copies that were "missed"

1223

by merge.update().

1220

by merge.update().

1224

"""

1221

"""

1225

new_copies = pathcopies(base, ctx)

1222

new_copies = pathcopies(base, ctx)

1226

_filter(wctx.p1(), wctx, new_copies)

1223

_filter(wctx.p1(), wctx, new_copies)

1227

for dst, src in pycompat.iteritems(new_copies):

1224

for dst, src in pycompat.iteritems(new_copies):

1228

wctx[dst].markcopied(src)

1225

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import (
                 nullid,
                 nullrev,
             )
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 if rustmod is not None:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         return (p1, p2, raw)
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         else:
                             changes = None
                         return (p1, p2, changes)
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
                     # this deal with the special case mentionned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
                     # We iterate over the childre, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
                     # An interresting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     return _combine_changeset_copies(
                         revs, children_count, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None:
                     final_copies = rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, isancestor
                     )
                 else:
                     isancestor = cached_is_ancestor(isancestor)
                     all_copies = {}
                     # iterate over all the "children" side of copy tracing "edge"
                     for current_rev in revs:
                         p1, p2, changes = revinfo(current_rev)
                         current_copies = None
                         # iterate over all parents to chain the existing data with the
                         # data from the parent → child edge.
                         for parent, parent_rev in ((1, p1), (2, p2)):
                             if parent_rev == nullrev:
                                 continue
                             remaining_children = children_count.get(parent_rev)
                             if remaining_children is None:
                                 continue
                             remaining_children -= 1
                             children_count[parent_rev] = remaining_children
                             if remaining_children:
                                 copies = all_copies.get(parent_rev, None)
                             else:
                                 copies = all_copies.pop(parent_rev, None)
                             if copies is None:
                                 # this is a root
-                                copies = {}
+                                newcopies = copies = {}
+                            elif remaining_children:
+                                newcopies = copies.copy()
+                            else:
                                 newcopies = copies
                             # chain the data in the edge with the existing data
                             if changes is not None:
                                 childcopies = {}
                                 if parent == 1:
                                     childcopies = changes.copied_from_p1
                                 elif parent == 2:
                                     childcopies = changes.copied_from_p2
                                 if childcopies:
                                     newcopies = copies.copy()
                                     for dest, source in pycompat.iteritems(childcopies):
                                         prev = copies.get(source)
                                         if prev is not None and prev[1] is not None:
                                             source = prev[1]
                                         newcopies[dest] = (current_rev, source)
                                     assert newcopies is not copies
                                 if changes.removed:
-                                    if newcopies is copies:
-                                        newcopies = copies.copy()
                                     for f in changes.removed:
                                         if f in newcopies:
                                             if newcopies is copies:
                                                 # copy on write to avoid affecting potential other
                                                 # branches.  when there are no other branches, this
                                                 # could be avoided.
                                                 newcopies = copies.copy()
                                             newcopies[f] = (current_rev, None)
                             # check potential need to combine the data from another parent (for
                             # that child). See comment below for details.
                             if current_copies is None:
                                 current_copies = newcopies
-                            elif current_copies is newcopies:
-                                # nothing to merge:
-                                pass
                             else:
                                 # we are the second parent to work on c, we need to merge our
                                 # work with the other.
                                 #
                                 # In case of conflict, parent 1 take precedence over parent 2.
                                 # This is an arbitrary choice made anew when implementing
                                 # changeset based copies. It was made without regards with
                                 # potential filelog related behavior.
                                 assert parent == 2
                                 current_copies = _merge_copies_dict(
                                     newcopies, current_copies, isancestor, changes
                                 )
                         all_copies[current_rev] = current_copies
                     # filter out internal details and return a {dest: source mapping}
                     final_copies = {}
                     for dest, (tt, source) in all_copies[targetrev].items():
                         if source is not None:
                             final_copies[dest] = source
                 if not alwaysmatch:
                     for filename in list(final_copies.keys()):
                         if not match(filename):
                             del final_copies[filename]
                 return final_copies
             # constant to decide which side to pick with _merge_copies_dict
             PICK_MINOR = 0
             PICK_MAJOR = 1
             PICK_EITHER = 2
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         pick = _compare_values(changes, isancestor, dest, other, value)
                         if pick == PICK_MAJOR:
                             minor[dest] = value
                 return minor
             def _compare_values(changes, isancestor, dest, minor, major):
                 """compare two value within a _merge_copies_dict loop iteration"""
                 major_tt, major_value = major
                 minor_tt, minor_value = minor
                 # evacuate some simple case first:
                 if major_tt == minor_tt:
                     # if it comes from the same revision it must be the same value
                     assert major_value == minor_value
                     return PICK_EITHER
                 elif major[1] == minor[1]:
                     return PICK_EITHER
                 # actual merging needed: content from "major" wins, unless it is older than
                 # the branch point or there is a merge
                 elif changes is not None and major[1] is None and dest in changes.salvaged:
                     return PICK_MINOR
                 elif changes is not None and minor[1] is None and dest in changes.salvaged:
                     return PICK_MAJOR
                 elif changes is not None and dest in changes.merged:
                     return PICK_MAJOR
                 elif not isancestor(major_tt, minor_tt):
                     if major[1] is not None:
                         return PICK_MAJOR
                     elif isancestor(minor_tt, major_tt):
                         return PICK_MAJOR
                 return PICK_MINOR
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)