upstream/mercurial-mirror Commit - r46799:ee63c117

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import (

15

from .node import (

16

nullid,

16

nullid,

17

nullrev,

17

nullrev,

18

)

18

)

19

20

from . import (

20

from . import (

21

match as matchmod,

21

match as matchmod,

22

pathutil,

22

pathutil,

23

policy,

23

policy,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

from .revlogutils import (

31

from .revlogutils import (

32

flagutil,

32

flagutil,

33

sidedata as sidedatamod,

33

sidedata as sidedatamod,

34

)

34

)

35

36

rustmod = policy.importrust("copy_tracing")

36

rustmod = policy.importrust("copy_tracing")

37

38

39

def _filter(src, dst, t):

39

def _filter(src, dst, t):

40

"""filters out invalid copies after chaining"""

40

"""filters out invalid copies after chaining"""

41

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

44

# in the following table (not including trivial cases). For example, case 2

44

# in the following table (not including trivial cases). For example, case 2

45

# is where a file existed in 'src' and remained under that name in 'mid' and

45

# is where a file existed in 'src' and remained under that name in 'mid' and

46

# then was renamed between 'mid' and 'dst'.

46

# then was renamed between 'mid' and 'dst'.

47

#

47

#

48

# case src mid dst result

48

# case src mid dst result

49

# 1 x y - -

49

# 1 x y - -

50

# 2 x y y x->y

50

# 2 x y y x->y

51

# 3 x y x -

51

# 3 x y x -

52

# 4 x y z x->z

52

# 4 x y z x->z

53

# 5 - x y -

53

# 5 - x y -

54

# 6 x x y x->y

54

# 6 x x y x->y

55

#

55

#

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

58

# between 5 and 6, so it includes all cases in its result.

58

# between 5 and 6, so it includes all cases in its result.

59

# Cases 1, 3, and 5 are then removed by _filter().

59

# Cases 1, 3, and 5 are then removed by _filter().

60

61

for k, v in list(t.items()):

61

for k, v in list(t.items()):

62

# remove copies from files that didn't exist

62

# remove copies from files that didn't exist

63

if v not in src:

63

if v not in src:

64

del t[k]

64

del t[k]

65

# remove criss-crossed copies

65

# remove criss-crossed copies

66

elif k in src and v in dst:

66

elif k in src and v in dst:

67

del t[k]

67

del t[k]

68

# remove copies to files that were then removed

68

# remove copies to files that were then removed

69

elif k not in dst:

69

elif k not in dst:

70

del t[k]

70

del t[k]

71

72

73

def _chain(prefix, suffix):

73

def _chain(prefix, suffix):

74

"""chain two sets of copies 'prefix' and 'suffix'"""

74

"""chain two sets of copies 'prefix' and 'suffix'"""

75

result = prefix.copy()

75

result = prefix.copy()

76

for key, value in pycompat.iteritems(suffix):

76

for key, value in pycompat.iteritems(suffix):

77

result[key] = prefix.get(value, value)

77

result[key] = prefix.get(value, value)

78

return result

78

return result

79

80

81

def _tracefile(fctx, am, basemf):

81

def _tracefile(fctx, am, basemf):

82

"""return file context that is the ancestor of fctx present in ancestor

82

"""return file context that is the ancestor of fctx present in ancestor

83

manifest am

83

manifest am

84

85

Note: we used to try and stop after a given limit, however checking if that

85

Note: we used to try and stop after a given limit, however checking if that

86

limit is reached turned out to be very expensive. we are better off

86

limit is reached turned out to be very expensive. we are better off

87

disabling that feature."""

87

disabling that feature."""

88

89

for f in fctx.ancestors():

89

for f in fctx.ancestors():

90

path = f.path()

90

path = f.path()

91

if am.get(path, None) == f.filenode():

91

if am.get(path, None) == f.filenode():

92

return path

92

return path

93

if basemf and basemf.get(path, None) == f.filenode():

93

if basemf and basemf.get(path, None) == f.filenode():

94

return path

94

return path

95

96

97

def _dirstatecopies(repo, match=None):

97

def _dirstatecopies(repo, match=None):

98

ds = repo.dirstate

98

ds = repo.dirstate

99

c = ds.copies().copy()

99

c = ds.copies().copy()

100

for k in list(c):

100

for k in list(c):

101

if ds[k] not in b'anm' or (match and not match(k)):

101

if ds[k] not in b'anm' or (match and not match(k)):

102

del c[k]

102

del c[k]

103

return c

103

return c

104

105

106

def _computeforwardmissing(a, b, match=None):

106

def _computeforwardmissing(a, b, match=None):

107

"""Computes which files are in b but not a.

107

"""Computes which files are in b but not a.

108

This is its own function so extensions can easily wrap this call to see what

108

This is its own function so extensions can easily wrap this call to see what

109

files _forwardcopies is about to process.

109

files _forwardcopies is about to process.

110

"""

110

"""

111

ma = a.manifest()

111

ma = a.manifest()

112

mb = b.manifest()

112

mb = b.manifest()

113

return mb.filesnotin(ma, match=match)

113

return mb.filesnotin(ma, match=match)

114

115

116

def usechangesetcentricalgo(repo):

116

def usechangesetcentricalgo(repo):

117

"""Checks if we should use changeset-centric copy algorithms"""

117

"""Checks if we should use changeset-centric copy algorithms"""

118

if repo.filecopiesmode == b'changeset-sidedata':

118

if repo.filecopiesmode == b'changeset-sidedata':

119

return True

119

return True

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

121

changesetsource = (b'changeset-only', b'compatibility')

121

changesetsource = (b'changeset-only', b'compatibility')

122

return readfrom in changesetsource

122

return readfrom in changesetsource

123

124

125

def _committedforwardcopies(a, b, base, match):

125

def _committedforwardcopies(a, b, base, match):

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

127

# files might have to be traced back to the fctx parent of the last

127

# files might have to be traced back to the fctx parent of the last

128

# one-side-only changeset, but not further back than that

128

# one-side-only changeset, but not further back than that

129

repo = a._repo

129

repo = a._repo

130

131

if usechangesetcentricalgo(repo):

131

if usechangesetcentricalgo(repo):

132

return _changesetforwardcopies(a, b, match)

132

return _changesetforwardcopies(a, b, match)

133

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

135

dbg = repo.ui.debug

135

dbg = repo.ui.debug

136

if debug:

136

if debug:

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

138

am = a.manifest()

138

am = a.manifest()

139

basemf = None if base is None else base.manifest()

139

basemf = None if base is None else base.manifest()

140

141

# find where new files came from

141

# find where new files came from

142

# we currently don't try to find where old files went, too expensive

142

# we currently don't try to find where old files went, too expensive

143

# this means we can miss a case like 'hg rm b; hg cp a b'

143

# this means we can miss a case like 'hg rm b; hg cp a b'

144

cm = {}

144

cm = {}

145

146

# Computing the forward missing is quite expensive on large manifests, since

146

# Computing the forward missing is quite expensive on large manifests, since

147

# it compares the entire manifests. We can optimize it in the common use

147

# it compares the entire manifests. We can optimize it in the common use

148

# case of computing what copies are in a commit versus its parent (like

148

# case of computing what copies are in a commit versus its parent (like

149

# during a rebase or histedit). Note, we exclude merge commits from this

149

# during a rebase or histedit). Note, we exclude merge commits from this

150

# optimization, since the ctx.files() for a merge commit is not correct for

150

# optimization, since the ctx.files() for a merge commit is not correct for

151

# this comparison.

151

# this comparison.

152

forwardmissingmatch = match

152

forwardmissingmatch = match

153

if b.p1() == a and b.p2().node() == nullid:

153

if b.p1() == a and b.p2().node() == nullid:

154

filesmatcher = matchmod.exact(b.files())

154

filesmatcher = matchmod.exact(b.files())

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

157

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

159

160

if debug:

160

if debug:

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

162

163

for f in sorted(missing):

163

for f in sorted(missing):

164

if debug:

164

if debug:

165

dbg(b'debug.copies: tracing file: %s\n' % f)

165

dbg(b'debug.copies: tracing file: %s\n' % f)

166

fctx = b[f]

166

fctx = b[f]

167

fctx._ancestrycontext = ancestrycontext

167

fctx._ancestrycontext = ancestrycontext

168

169

if debug:

169

if debug:

170

start = util.timer()

170

start = util.timer()

171

opath = _tracefile(fctx, am, basemf)

171

opath = _tracefile(fctx, am, basemf)

172

if opath:

172

if opath:

173

if debug:

173

if debug:

174

dbg(b'debug.copies: rename of: %s\n' % opath)

174

dbg(b'debug.copies: rename of: %s\n' % opath)

175

cm[f] = opath

175

cm[f] = opath

176

if debug:

176

if debug:

177

dbg(

177

dbg(

178

b'debug.copies: time: %f seconds\n'

178

b'debug.copies: time: %f seconds\n'

179

% (util.timer() - start)

179

% (util.timer() - start)

180

)

180

)

181

return cm

181

return cm

182

183

184

def _revinfo_getter(repo, match):

184

def _revinfo_getter(repo, match):

185

"""returns a function that returns the following data given a <rev>"

185

"""returns a function that returns the following data given a <rev>"

186

187

* p1: revision number of first parent

187

* p1: revision number of first parent

188

* p2: revision number of first parent

188

* p2: revision number of first parent

189

* changes: a ChangingFiles object

189

* changes: a ChangingFiles object

190

"""

190

"""

191

cl = repo.changelog

191

cl = repo.changelog

192

parents = cl.parentrevs

192

parents = cl.parentrevs

193

flags = cl.flags

193

flags = cl.flags

194

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

196

197

changelogrevision = cl.changelogrevision

197

changelogrevision = cl.changelogrevision

198

199

if rustmod is not None:

199

if rustmod is not None:

200

201

def revinfo(rev):

201

def revinfo(rev):

202

p1, p2 = parents(rev)

202

p1, p2 = parents(rev)

203

if flags(rev) & HASCOPIESINFO:

203

if flags(rev) & HASCOPIESINFO:

204

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

204

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

205

else:

205

else:

206

raw = None

206

raw = None

207

return (p1, p2, raw)

207

return (p1, p2, raw)

208

209

else:

209

else:

210

211

def revinfo(rev):

211

def revinfo(rev):

212

p1, p2 = parents(rev)

212

p1, p2 = parents(rev)

213

if flags(rev) & HASCOPIESINFO:

213

if flags(rev) & HASCOPIESINFO:

214

changes = changelogrevision(rev).changes

214

changes = changelogrevision(rev).changes

215

else:

215

else:

216

changes = None

216

changes = None

217

return (p1, p2, changes)

217

return (p1, p2, changes)

218

219

return revinfo

219

return revinfo

220

221

222

def cached_is_ancestor(is_ancestor):

222

def cached_is_ancestor(is_ancestor):

223

"""return a cached version of is_ancestor"""

223

"""return a cached version of is_ancestor"""

224

cache = {}

224

cache = {}

225

226

def _is_ancestor(anc, desc):

226

def _is_ancestor(anc, desc):

227

if anc > desc:

227

if anc > desc:

228

return False

228

return False

229

elif anc == desc:

229

elif anc == desc:

230

return True

230

return True

231

key = (anc, desc)

231

key = (anc, desc)

232

ret = cache.get(key)

232

ret = cache.get(key)

233

if ret is None:

233

if ret is None:

234

ret = cache[key] = is_ancestor(anc, desc)

234

ret = cache[key] = is_ancestor(anc, desc)

235

return ret

235

return ret

236

237

return _is_ancestor

237

return _is_ancestor

238

239

240

def _changesetforwardcopies(a, b, match):

240

def _changesetforwardcopies(a, b, match):

241

if a.rev() in (nullrev, b.rev()):

241

if a.rev() in (nullrev, b.rev()):

242

return {}

242

return {}

243

244

repo = a.repo().unfiltered()

244

repo = a.repo().unfiltered()

245

children = {}

245

children = {}

246

247

cl = repo.changelog

247

cl = repo.changelog

248

isancestor = cl.isancestorrev

248

isancestor = cl.isancestorrev

249

250

# To track rename from "A" to B, we need to gather all parent → children

250

# To track rename from "A" to B, we need to gather all parent → children

251

# edges that are contains in `::B` but not in `::A`.

251

# edges that are contains in `::B` but not in `::A`.

252

#

252

#

253

#

253

#

254

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

254

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

255

# ::a`) and also all the "roots point", ie the parents of the exclusive set

255

# ::a`) and also all the "roots point", ie the parents of the exclusive set

256

# that belong to ::a. These are exactly all the revisions needed to express

256

# that belong to ::a. These are exactly all the revisions needed to express

257

# the parent → children we need to combine.

257

# the parent → children we need to combine.

258

#

258

#

259

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

259

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

260

# excluding paths that leads to roots that are not ancestors of `a`. We

260

# excluding paths that leads to roots that are not ancestors of `a`. We

261

# keep this out of the explanation because it is hard enough without this special case..

261

# keep this out of the explanation because it is hard enough without this special case..

262

263

parents = cl._uncheckedparentrevs

263

parents = cl._uncheckedparentrevs

264

graph_roots = (nullrev, nullrev)

264

graph_roots = (nullrev, nullrev)

265

266

ancestors = cl.ancestors([a.rev()], inclusive=True)

266

ancestors = cl.ancestors([a.rev()], inclusive=True)

267

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

267

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

268

roots = set()

268

roots = set()

269

has_graph_roots = False

269

has_graph_roots = False

270

271

# iterate over `only(B, A)`

271

# iterate over `only(B, A)`

272

for r in revs:

272

for r in revs:

273

ps = parents(r)

273

ps = parents(r)

274

if ps == graph_roots:

274

if ps == graph_roots:

275

has_graph_roots = True

275

has_graph_roots = True

276

else:

276

else:

277

p1, p2 = ps

277

p1, p2 = ps

278

279

# find all the "root points" (see larger comment above)

279

# find all the "root points" (see larger comment above)

280

if p1 != nullrev and p1 in ancestors:

280

if p1 != nullrev and p1 in ancestors:

281

roots.add(p1)

281

roots.add(p1)

282

if p2 != nullrev and p2 in ancestors:

282

if p2 != nullrev and p2 in ancestors:

283

roots.add(p2)

283

roots.add(p2)

284

if not roots:

284

if not roots:

285

# no common revision to track copies from

285

# no common revision to track copies from

286

return {}

286

return {}

287

if has_graph_roots:

287

if has_graph_roots:

288

# this deal with the special case mentionned in the [1] footnotes. We

288

# this deal with the special case mentionned in the [1] footnotes. We

289

# must filter out revisions that leads to non-common graphroots.

289

# must filter out revisions that leads to non-common graphroots.

290

roots = list(roots)

290

roots = list(roots)

291

m = min(roots)

291

m = min(roots)

292

h = [b.rev()]

292

h = [b.rev()]

293

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

293

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

294

roots_to_head = set(roots_to_head)

294

roots_to_head = set(roots_to_head)

295

revs = [r for r in revs if r in roots_to_head]

295

revs = [r for r in revs if r in roots_to_head]

296

297

if repo.filecopiesmode == b'changeset-sidedata':

297

if repo.filecopiesmode == b'changeset-sidedata':

298

# When using side-data, we will process the edges "from" the children.

298

# When using side-data, we will process the edges "from" the children.

299

# We iterate over the childre, gathering previous collected data for

299

# We iterate over the childre, gathering previous collected data for

300

# the parents. Do know when the parents data is no longer necessary, we

300

# the parents. Do know when the parents data is no longer necessary, we

301

# keep a counter of how many children each revision has.

301

# keep a counter of how many children each revision has.

302

#

302

#

303

# An interresting property of `children_count` is that it only contains

303

# An interresting property of `children_count` is that it only contains

304

# revision that will be relevant for a edge of the graph. So if a

304

# revision that will be relevant for a edge of the graph. So if a

305

# children has parent not in `children_count`, that edges should not be

305

# children has parent not in `children_count`, that edges should not be

306

# processed.

306

# processed.

307

children_count = dict((r, 0) for r in roots)

307

children_count = dict((r, 0) for r in roots)

308

for r in revs:

308

for r in revs:

309

for p in cl.parentrevs(r):

309

for p in cl.parentrevs(r):

310

if p == nullrev:

310

if p == nullrev:

311

continue

311

continue

312

children_count[r] = 0

312

children_count[r] = 0

313

if p in children_count:

313

if p in children_count:

314

children_count[p] += 1

314

children_count[p] += 1

315

revinfo = _revinfo_getter(repo, match)

315

revinfo = _revinfo_getter(repo, match)

316

return _combine_changeset_copies(

316

return _combine_changeset_copies(

317

revs, children_count, b.rev(), revinfo, match, isancestor

317

revs, children_count, b.rev(), revinfo, match, isancestor

318

)

318

)

319

else:

319

else:

320

# When not using side-data, we will process the edges "from" the parent.

320

# When not using side-data, we will process the edges "from" the parent.

321

# so we need a full mapping of the parent -> children relation.

321

# so we need a full mapping of the parent -> children relation.

322

children = dict((r, []) for r in roots)

322

children = dict((r, []) for r in roots)

323

for r in revs:

323

for r in revs:

324

for p in cl.parentrevs(r):

324

for p in cl.parentrevs(r):

325

if p == nullrev:

325

if p == nullrev:

326

continue

326

continue

327

children[r] = []

327

children[r] = []

328

if p in children:

328

if p in children:

329

children[p].append(r)

329

children[p].append(r)

330

x = revs.pop()

330

x = revs.pop()

331

assert x == b.rev()

331

assert x == b.rev()

332

revs.extend(roots)

332

revs.extend(roots)

333

revs.sort()

333

revs.sort()

334

335

revinfo = _revinfo_getter_extra(repo)

335

revinfo = _revinfo_getter_extra(repo)

336

return _combine_changeset_copies_extra(

336

return _combine_changeset_copies_extra(

337

revs, children, b.rev(), revinfo, match, isancestor

337

revs, children, b.rev(), revinfo, match, isancestor

338

)

338

)

339

340

341

def _combine_changeset_copies(

341

def _combine_changeset_copies(

342

revs, children_count, targetrev, revinfo, match, isancestor

342

revs, children_count, targetrev, revinfo, match, isancestor

343

):

343

):

344

"""combine the copies information for each item of iterrevs

344

"""combine the copies information for each item of iterrevs

345

346

revs: sorted iterable of revision to visit

346

revs: sorted iterable of revision to visit

347

children_count: a {parent: <number-of-relevant-children>} mapping.

347

children_count: a {parent: <number-of-relevant-children>} mapping.

348

targetrev: the final copies destination revision (not in iterrevs)

348

targetrev: the final copies destination revision (not in iterrevs)

349

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

349

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

350

match: a matcher

350

match: a matcher

351

352

It returns the aggregated copies information for `targetrev`.

352

It returns the aggregated copies information for `targetrev`.

353

"""

353

"""

354

355

alwaysmatch = match.always()

355

alwaysmatch = match.always()

356

357

if rustmod is not None:

357

if rustmod is not None:

358

final_copies = rustmod.combine_changeset_copies(

358

final_copies = rustmod.combine_changeset_copies(

359

list(revs), children_count, targetrev, revinfo, isancestor

359

list(revs), children_count, targetrev, revinfo, isancestor

360

)

360

)

361

else:

361

else:

362

isancestor = cached_is_ancestor(isancestor)

362

isancestor = cached_is_ancestor(isancestor)

363

364

all_copies = {}

364

all_copies = {}

365

# iterate over all the "children" side of copy tracing "edge"

365

# iterate over all the "children" side of copy tracing "edge"

366

for current_rev in revs:

366

for current_rev in revs:

367

p1, p2, changes = revinfo(current_rev)

367

p1, p2, changes = revinfo(current_rev)

368

current_copies = None

368

current_copies = None

369

# iterate over all parents to chain the existing data with the

369

# iterate over all parents to chain the existing data with the

370

# data from the parent → child edge.

370

# data from the parent → child edge.

371

for parent, parent_rev in ((1, p1), (2, p2)):

371

for parent, parent_rev in ((1, p1), (2, p2)):

372

if parent_rev == nullrev:

372

if parent_rev == nullrev:

373

continue

373

continue

374

remaining_children = children_count.get(parent_rev)

374

remaining_children = children_count.get(parent_rev)

375

if remaining_children is None:

375

if remaining_children is None:

376

continue

376

continue

377

remaining_children -= 1

377

remaining_children -= 1

378

children_count[parent_rev] = remaining_children

378

children_count[parent_rev] = remaining_children

379

if remaining_children:

379

if remaining_children:

380

copies = all_copies.get(parent_rev, None)

380

copies = all_copies.get(parent_rev, None)

381

else:

381

else:

382

copies = all_copies.pop(parent_rev, None)

382

copies = all_copies.pop(parent_rev, None)

383

384

if copies is None:

384

if copies is None:

385

# this is a root

385

# this is a root

386

copies = {}

386

copies = {}

387

388

newcopies = copies

388

newcopies = copies

389

# chain the data in the edge with the existing data

389

# chain the data in the edge with the existing data

390

if changes is not None:

390

if changes is not None:

391

childcopies = {}

391

childcopies = {}

392

if parent == 1:

392

if parent == 1:

393

childcopies = changes.copied_from_p1

393

childcopies = changes.copied_from_p1

394

elif parent == 2:

394

elif parent == 2:

395

childcopies = changes.copied_from_p2

395

childcopies = changes.copied_from_p2

396

397

if childcopies:

397

if childcopies:

398

newcopies = copies.copy()

398

newcopies = copies.copy()

399

for dest, source in pycompat.iteritems(childcopies):

399

for dest, source in pycompat.iteritems(childcopies):

400

prev = copies.get(source)

400

prev = copies.get(source)

401

if prev is not None and prev[1] is not None:

401

if prev is not None and prev[1] is not None:

402

source = prev[1]

402

source = prev[1]

403

newcopies[dest] = (current_rev, source)

403

newcopies[dest] = (current_rev, source)

404

assert newcopies is not copies

404

assert newcopies is not copies

405

if changes.removed:

405

if changes.removed:

406

if newcopies is copies:

406

if newcopies is copies:

407

newcopies = copies.copy()

407

newcopies = copies.copy()

408

for f in changes.removed:

408

for f in changes.removed:

409

if f in newcopies:

409

if f in newcopies:

410

if newcopies is copies:

410

if newcopies is copies:

411

# copy on write to avoid affecting potential other

411

# copy on write to avoid affecting potential other

412

# branches. when there are no other branches, this

412

# branches. when there are no other branches, this

413

# could be avoided.

413

# could be avoided.

414

newcopies = copies.copy()

414

newcopies = copies.copy()

415

newcopies[f] = (current_rev, None)

415

newcopies[f] = (current_rev, None)

416

# check potential need to combine the data from another parent (for

416

# check potential need to combine the data from another parent (for

417

# that child). See comment below for details.

417

# that child). See comment below for details.

418

if current_copies is None:

418

if current_copies is None:

419

current_copies = newcopies

419

current_copies = newcopies

420

elif current_copies is newcopies:

420

elif current_copies is newcopies:

421

# nothing to merge:

421

# nothing to merge:

422

pass

422

pass

423

else:

423

else:

424

# we are the second parent to work on c, we need to merge our

424

# we are the second parent to work on c, we need to merge our

425

# work with the other.

425

# work with the other.

426

#

426

#

427

# In case of conflict, parent 1 take precedence over parent 2.

427

# In case of conflict, parent 1 take precedence over parent 2.

428

# This is an arbitrary choice made anew when implementing

428

# This is an arbitrary choice made anew when implementing

429

# changeset based copies. It was made without regards with

429

# changeset based copies. It was made without regards with

430

# potential filelog related behavior.

430

# potential filelog related behavior.

431

assert parent == 2

431

assert parent == 2

432

current_copies = _merge_copies_dict(

432

current_copies = _merge_copies_dict(

433

newcopies, current_copies, isancestor, changes

433

newcopies, current_copies, isancestor, changes

434

)

434

)

435

all_copies[current_rev] = current_copies

435

all_copies[current_rev] = current_copies

436

437

# filter out internal details and return a {dest: source mapping}

437

# filter out internal details and return a {dest: source mapping}

438

final_copies = {}

438

final_copies = {}

439

for dest, (tt, source) in all_copies[targetrev].items():

439

for dest, (tt, source) in all_copies[targetrev].items():

440

if source is not None:

440

if source is not None:

441

final_copies[dest] = source

441

final_copies[dest] = source

442

if not alwaysmatch:

442

if not alwaysmatch:

443

for filename in list(final_copies.keys()):

443

for filename in list(final_copies.keys()):

444

if not match(filename):

444

if not match(filename):

445

del final_copies[filename]

445

del final_copies[filename]

446

return final_copies

446

return final_copies

447

448

449

# constant to decide which side to pick with _merge_copies_dict

449

# constant to decide which side to pick with _merge_copies_dict

450

PICK_MINOR = 0

450

PICK_MINOR = 0

451

PICK_MAJOR = 1

451

PICK_MAJOR = 1

452

PICK_EITHER = 2

452

PICK_EITHER = 2

453

454

455

def _merge_copies_dict(minor, major, isancestor, changes):

455

def _merge_copies_dict(minor, major, isancestor, changes):

456

"""merge two copies-mapping together, minor and major

456

"""merge two copies-mapping together, minor and major

457

458

In case of conflict, value from "major" will be picked.

458

In case of conflict, value from "major" will be picked.

459

460

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

460

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

461

ancestors of `high_rev`,

461

ancestors of `high_rev`,

462

463

- `ismerged(path)`: callable return True if `path` have been merged in the

463

- `ismerged(path)`: callable return True if `path` have been merged in the

464

current revision,

464

current revision,

465

466

return the resulting dict (in practice, the "minor" object, updated)

466

return the resulting dict (in practice, the "minor" object, updated)

467

"""

467

"""

468

for dest, value in major.items():

468

for dest, value in major.items():

469

other = minor.get(dest)

469

other = minor.get(dest)

470

if other is None:

470

if other is None:

471

minor[dest] = value

471

minor[dest] = value

472

else:

472

else:

473

pick = _compare_values(changes, isancestor, dest, other, value)

473

pick = _compare_values(changes, isancestor, dest, other, value)

474

if pick == PICK_MAJOR:

474

if pick == PICK_MAJOR:

475

minor[dest] = value

475

minor[dest] = value

476

return minor

476

return minor

477

478

479

def _compare_values(changes, isancestor, dest, minor, major):

479

def _compare_values(changes, isancestor, dest, minor, major):

480

"""compare two value within a _merge_copies_dict loop iteration"""

480

"""compare two value within a _merge_copies_dict loop iteration"""

481

major_tt = major[0]

481

major_tt, major_value = major

482

minor_tt = minor[0]

482

minor_tt, minor_value = minor

483

484

if major[1] == minor[1]:

484

# evacuate some simple case first:

485

if major_tt == minor_tt:

486

# if it comes from the same revision it must be the same value

487

assert major_value == minor_value

485

return PICK_EITHER

488

return PICK_EITHER

486

# content from "major" wins, unless it is older

489

elif major[1] == minor[1]:

487

# than the branch point or there is a merge

490

return PICK_EITHER

488

if major_tt == minor_tt:

491

489

return PICK_MAJOR

492

# actual merging needed: content from "major" wins, unless it is older than

493

# the branch point or there is a merge

490

elif changes is not None and major[1] is None and dest in changes.salvaged:

494

elif changes is not None and major[1] is None and dest in changes.salvaged:

491

return PICK_MINOR

495

return PICK_MINOR

492

elif changes is not None and minor[1] is None and dest in changes.salvaged:

496

elif changes is not None and minor[1] is None and dest in changes.salvaged:

493

return PICK_MAJOR

497

return PICK_MAJOR

494

elif changes is not None and dest in changes.merged:

498

elif changes is not None and dest in changes.merged:

495

return PICK_MAJOR

499

return PICK_MAJOR

496

elif not isancestor(major_tt, minor_tt):

500

elif not isancestor(major_tt, minor_tt):

497

if major[1] is not None:

501

if major[1] is not None:

498

return PICK_MAJOR

502

return PICK_MAJOR

499

elif isancestor(minor_tt, major_tt):

503

elif isancestor(minor_tt, major_tt):

500

return PICK_MAJOR

504

return PICK_MAJOR

501

return PICK_MINOR

505

return PICK_MINOR

502

506

503

507

504

def _revinfo_getter_extra(repo):

508

def _revinfo_getter_extra(repo):

505

"""return a function that return multiple data given a <rev>"i

509

"""return a function that return multiple data given a <rev>"i

506

510

507

* p1: revision number of first parent

511

* p1: revision number of first parent

508

* p2: revision number of first parent

512

* p2: revision number of first parent

509

* p1copies: mapping of copies from p1

513

* p1copies: mapping of copies from p1

510

* p2copies: mapping of copies from p2

514

* p2copies: mapping of copies from p2

511

* removed: a list of removed files

515

* removed: a list of removed files

512

* ismerged: a callback to know if file was merged in that revision

516

* ismerged: a callback to know if file was merged in that revision

513

"""

517

"""

514

cl = repo.changelog

518

cl = repo.changelog

515

parents = cl.parentrevs

519

parents = cl.parentrevs

516

520

517

def get_ismerged(rev):

521

def get_ismerged(rev):

518

ctx = repo[rev]

522

ctx = repo[rev]

519

523

520

def ismerged(path):

524

def ismerged(path):

521

if path not in ctx.files():

525

if path not in ctx.files():

522

return False

526

return False

523

fctx = ctx[path]

527

fctx = ctx[path]

524

parents = fctx._filelog.parents(fctx._filenode)

528

parents = fctx._filelog.parents(fctx._filenode)

525

nb_parents = 0

529

nb_parents = 0

526

for n in parents:

530

for n in parents:

527

if n != nullid:

531

if n != nullid:

528

nb_parents += 1

532

nb_parents += 1

529

return nb_parents >= 2

533

return nb_parents >= 2

530

534

531

return ismerged

535

return ismerged

532

536

533

def revinfo(rev):

537

def revinfo(rev):

534

p1, p2 = parents(rev)

538

p1, p2 = parents(rev)

535

ctx = repo[rev]

539

ctx = repo[rev]

536

p1copies, p2copies = ctx._copies

540

p1copies, p2copies = ctx._copies

537

removed = ctx.filesremoved()

541

removed = ctx.filesremoved()

538

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

542

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

539

543

540

return revinfo

544

return revinfo

541

545

542

546

543

def _combine_changeset_copies_extra(

547

def _combine_changeset_copies_extra(

544

revs, children, targetrev, revinfo, match, isancestor

548

revs, children, targetrev, revinfo, match, isancestor

545

):

549

):

546

"""version of `_combine_changeset_copies` that works with the Google

550

"""version of `_combine_changeset_copies` that works with the Google

547

specific "extra" based storage for copy information"""

551

specific "extra" based storage for copy information"""

548

all_copies = {}

552

all_copies = {}

549

alwaysmatch = match.always()

553

alwaysmatch = match.always()

550

for r in revs:

554

for r in revs:

551

copies = all_copies.pop(r, None)

555

copies = all_copies.pop(r, None)

552

if copies is None:

556

if copies is None:

553

# this is a root

557

# this is a root

554

copies = {}

558

copies = {}

555

for i, c in enumerate(children[r]):

559

for i, c in enumerate(children[r]):

556

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

560

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

557

if r == p1:

561

if r == p1:

558

parent = 1

562

parent = 1

559

childcopies = p1copies

563

childcopies = p1copies

560

else:

564

else:

561

assert r == p2

565

assert r == p2

562

parent = 2

566

parent = 2

563

childcopies = p2copies

567

childcopies = p2copies

564

if not alwaysmatch:

568

if not alwaysmatch:

565

childcopies = {

569

childcopies = {

566

dst: src for dst, src in childcopies.items() if match(dst)

570

dst: src for dst, src in childcopies.items() if match(dst)

567

}

571

}

568

newcopies = copies

572

newcopies = copies

569

if childcopies:

573

if childcopies:

570

newcopies = copies.copy()

574

newcopies = copies.copy()

571

for dest, source in pycompat.iteritems(childcopies):

575

for dest, source in pycompat.iteritems(childcopies):

572

prev = copies.get(source)

576

prev = copies.get(source)

573

if prev is not None and prev[1] is not None:

577

if prev is not None and prev[1] is not None:

574

source = prev[1]

578

source = prev[1]

575

newcopies[dest] = (c, source)

579

newcopies[dest] = (c, source)

576

assert newcopies is not copies

580

assert newcopies is not copies

577

for f in removed:

581

for f in removed:

578

if f in newcopies:

582

if f in newcopies:

579

if newcopies is copies:

583

if newcopies is copies:

580

# copy on write to avoid affecting potential other

584

# copy on write to avoid affecting potential other

581

# branches. when there are no other branches, this

585

# branches. when there are no other branches, this

582

# could be avoided.

586

# could be avoided.

583

newcopies = copies.copy()

587

newcopies = copies.copy()

584

newcopies[f] = (c, None)

588

newcopies[f] = (c, None)

585

othercopies = all_copies.get(c)

589

othercopies = all_copies.get(c)

586

if othercopies is None:

590

if othercopies is None:

587

all_copies[c] = newcopies

591

all_copies[c] = newcopies

588

else:

592

else:

589

# we are the second parent to work on c, we need to merge our

593

# we are the second parent to work on c, we need to merge our

590

# work with the other.

594

# work with the other.

591

#

595

#

592

# In case of conflict, parent 1 take precedence over parent 2.

596

# In case of conflict, parent 1 take precedence over parent 2.

593

# This is an arbitrary choice made anew when implementing

597

# This is an arbitrary choice made anew when implementing

594

# changeset based copies. It was made without regards with

598

# changeset based copies. It was made without regards with

595

# potential filelog related behavior.

599

# potential filelog related behavior.

596

if parent == 1:

600

if parent == 1:

597

_merge_copies_dict_extra(

601

_merge_copies_dict_extra(

598

othercopies, newcopies, isancestor, ismerged

602

othercopies, newcopies, isancestor, ismerged

599

)

603

)

600

else:

604

else:

601

_merge_copies_dict_extra(

605

_merge_copies_dict_extra(

602

newcopies, othercopies, isancestor, ismerged

606

newcopies, othercopies, isancestor, ismerged

603

)

607

)

604

all_copies[c] = newcopies

608

all_copies[c] = newcopies

605

609

606

final_copies = {}

610

final_copies = {}

607

for dest, (tt, source) in all_copies[targetrev].items():

611

for dest, (tt, source) in all_copies[targetrev].items():

608

if source is not None:

612

if source is not None:

609

final_copies[dest] = source

613

final_copies[dest] = source

610

return final_copies

614

return final_copies

611

615

612

616

613

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

617

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

614

"""version of `_merge_copies_dict` that works with the Google

618

"""version of `_merge_copies_dict` that works with the Google

615

specific "extra" based storage for copy information"""

619

specific "extra" based storage for copy information"""

616

for dest, value in major.items():

620

for dest, value in major.items():

617

other = minor.get(dest)

621

other = minor.get(dest)

618

if other is None:

622

if other is None:

619

minor[dest] = value

623

minor[dest] = value

620

else:

624

else:

621

new_tt = value[0]

625

new_tt = value[0]

622

other_tt = other[0]

626

other_tt = other[0]

623

if value[1] == other[1]:

627

if value[1] == other[1]:

624

continue

628

continue

625

# content from "major" wins, unless it is older

629

# content from "major" wins, unless it is older

626

# than the branch point or there is a merge

630

# than the branch point or there is a merge

627

if (

631

if (

628

new_tt == other_tt

632

new_tt == other_tt

629

or not isancestor(new_tt, other_tt)

633

or not isancestor(new_tt, other_tt)

630

or ismerged(dest)

634

or ismerged(dest)

631

):

635

):

632

minor[dest] = value

636

minor[dest] = value

633

637

634

638

635

def _forwardcopies(a, b, base=None, match=None):

639

def _forwardcopies(a, b, base=None, match=None):

636

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

640

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

637

641

638

if base is None:

642

if base is None:

639

base = a

643

base = a

640

match = a.repo().narrowmatch(match)

644

match = a.repo().narrowmatch(match)

641

# check for working copy

645

# check for working copy

642

if b.rev() is None:

646

if b.rev() is None:

643

cm = _committedforwardcopies(a, b.p1(), base, match)

647

cm = _committedforwardcopies(a, b.p1(), base, match)

644

# combine copies from dirstate if necessary

648

# combine copies from dirstate if necessary

645

copies = _chain(cm, _dirstatecopies(b._repo, match))

649

copies = _chain(cm, _dirstatecopies(b._repo, match))

646

else:

650

else:

647

copies = _committedforwardcopies(a, b, base, match)

651

copies = _committedforwardcopies(a, b, base, match)

648

return copies

652

return copies

649

653

650

654

651

def _backwardrenames(a, b, match):

655

def _backwardrenames(a, b, match):

652

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

656

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

653

return {}

657

return {}

654

658

655

# Even though we're not taking copies into account, 1:n rename situations

659

# Even though we're not taking copies into account, 1:n rename situations

656

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

660

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

657

# arbitrarily pick one of the renames.

661

# arbitrarily pick one of the renames.

658

# We don't want to pass in "match" here, since that would filter

662

# We don't want to pass in "match" here, since that would filter

659

# the destination by it. Since we're reversing the copies, we want

663

# the destination by it. Since we're reversing the copies, we want

660

# to filter the source instead.

664

# to filter the source instead.

661

f = _forwardcopies(b, a)

665

f = _forwardcopies(b, a)

662

r = {}

666

r = {}

663

for k, v in sorted(pycompat.iteritems(f)):

667

for k, v in sorted(pycompat.iteritems(f)):

664

if match and not match(v):

668

if match and not match(v):

665

continue

669

continue

666

# remove copies

670

# remove copies

667

if v in a:

671

if v in a:

668

continue

672

continue

669

r[v] = k

673

r[v] = k

670

return r

674

return r

671

675

672

676

673

def pathcopies(x, y, match=None):

677

def pathcopies(x, y, match=None):

674

"""find {dst@y: src@x} copy mapping for directed compare"""

678

"""find {dst@y: src@x} copy mapping for directed compare"""

675

repo = x._repo

679

repo = x._repo

676

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

680

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

677

if debug:

681

if debug:

678

repo.ui.debug(

682

repo.ui.debug(

679

b'debug.copies: searching copies from %s to %s\n' % (x, y)

683

b'debug.copies: searching copies from %s to %s\n' % (x, y)

680

)

684

)

681

if x == y or not x or not y:

685

if x == y or not x or not y:

682

return {}

686

return {}

683

if y.rev() is None and x == y.p1():

687

if y.rev() is None and x == y.p1():

684

if debug:

688

if debug:

685

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

689

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

686

# short-circuit to avoid issues with merge states

690

# short-circuit to avoid issues with merge states

687

return _dirstatecopies(repo, match)

691

return _dirstatecopies(repo, match)

688

a = y.ancestor(x)

692

a = y.ancestor(x)

689

if a == x:

693

if a == x:

690

if debug:

694

if debug:

691

repo.ui.debug(b'debug.copies: search mode: forward\n')

695

repo.ui.debug(b'debug.copies: search mode: forward\n')

692

copies = _forwardcopies(x, y, match=match)

696

copies = _forwardcopies(x, y, match=match)

693

elif a == y:

697

elif a == y:

694

if debug:

698

if debug:

695

repo.ui.debug(b'debug.copies: search mode: backward\n')

699

repo.ui.debug(b'debug.copies: search mode: backward\n')

696

copies = _backwardrenames(x, y, match=match)

700

copies = _backwardrenames(x, y, match=match)

697

else:

701

else:

698

if debug:

702

if debug:

699

repo.ui.debug(b'debug.copies: search mode: combined\n')

703

repo.ui.debug(b'debug.copies: search mode: combined\n')

700

base = None

704

base = None

701

if a.rev() != nullrev:

705

if a.rev() != nullrev:

702

base = x

706

base = x

703

copies = _chain(

707

copies = _chain(

704

_backwardrenames(x, a, match=match),

708

_backwardrenames(x, a, match=match),

705

_forwardcopies(a, y, base, match=match),

709

_forwardcopies(a, y, base, match=match),

706

)

710

)

707

_filter(x, y, copies)

711

_filter(x, y, copies)

708

return copies

712

return copies

709

713

710

714

711

def mergecopies(repo, c1, c2, base):

715

def mergecopies(repo, c1, c2, base):

712

"""

716

"""

713

Finds moves and copies between context c1 and c2 that are relevant for

717

Finds moves and copies between context c1 and c2 that are relevant for

714

merging. 'base' will be used as the merge base.

718

merging. 'base' will be used as the merge base.

715

719

716

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

720

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

717

files that were moved/ copied in one merge parent and modified in another.

721

files that were moved/ copied in one merge parent and modified in another.

718

For example:

722

For example:

719

723

720

o ---> 4 another commit

724

o ---> 4 another commit

721

|

725

|

722

| o ---> 3 commit that modifies a.txt

726

| o ---> 3 commit that modifies a.txt

723

| /

727

| /

724

o / ---> 2 commit that moves a.txt to b.txt

728

o / ---> 2 commit that moves a.txt to b.txt

725

|/

729

|/

726

o ---> 1 merge base

730

o ---> 1 merge base

727

731

728

If we try to rebase revision 3 on revision 4, since there is no a.txt in

732

If we try to rebase revision 3 on revision 4, since there is no a.txt in

729

revision 4, and if user have copytrace disabled, we prints the following

733

revision 4, and if user have copytrace disabled, we prints the following

730

message:

734

message:

731

735

732

```other changed <file> which local deleted```

736

```other changed <file> which local deleted```

733

737

734

Returns a tuple where:

738

Returns a tuple where:

735

739

736

"branch_copies" an instance of branch_copies.

740

"branch_copies" an instance of branch_copies.

737

741

738

"diverge" is a mapping of source name -> list of destination names

742

"diverge" is a mapping of source name -> list of destination names

739

for divergent renames.

743

for divergent renames.

740

744

741

This function calls different copytracing algorithms based on config.

745

This function calls different copytracing algorithms based on config.

742

"""

746

"""

743

# avoid silly behavior for update from empty dir

747

# avoid silly behavior for update from empty dir

744

if not c1 or not c2 or c1 == c2:

748

if not c1 or not c2 or c1 == c2:

745

return branch_copies(), branch_copies(), {}

749

return branch_copies(), branch_copies(), {}

746

750

747

narrowmatch = c1.repo().narrowmatch()

751

narrowmatch = c1.repo().narrowmatch()

748

752

749

# avoid silly behavior for parent -> working dir

753

# avoid silly behavior for parent -> working dir

750

if c2.node() is None and c1.node() == repo.dirstate.p1():

754

if c2.node() is None and c1.node() == repo.dirstate.p1():

751

return (

755

return (

752

branch_copies(_dirstatecopies(repo, narrowmatch)),

756

branch_copies(_dirstatecopies(repo, narrowmatch)),

753

branch_copies(),

757

branch_copies(),

754

{},

758

{},

755

)

759

)

756

760

757

copytracing = repo.ui.config(b'experimental', b'copytrace')

761

copytracing = repo.ui.config(b'experimental', b'copytrace')

758

if stringutil.parsebool(copytracing) is False:

762

if stringutil.parsebool(copytracing) is False:

759

# stringutil.parsebool() returns None when it is unable to parse the

763

# stringutil.parsebool() returns None when it is unable to parse the

760

# value, so we should rely on making sure copytracing is on such cases

764

# value, so we should rely on making sure copytracing is on such cases

761

return branch_copies(), branch_copies(), {}

765

return branch_copies(), branch_copies(), {}

762

766

763

if usechangesetcentricalgo(repo):

767

if usechangesetcentricalgo(repo):

764

# The heuristics don't make sense when we need changeset-centric algos

768

# The heuristics don't make sense when we need changeset-centric algos

765

return _fullcopytracing(repo, c1, c2, base)

769

return _fullcopytracing(repo, c1, c2, base)

766

770

767

# Copy trace disabling is explicitly below the node == p1 logic above

771

# Copy trace disabling is explicitly below the node == p1 logic above

768

# because the logic above is required for a simple copy to be kept across a

772

# because the logic above is required for a simple copy to be kept across a

769

# rebase.

773

# rebase.

770

if copytracing == b'heuristics':

774

if copytracing == b'heuristics':

771

# Do full copytracing if only non-public revisions are involved as

775

# Do full copytracing if only non-public revisions are involved as

772

# that will be fast enough and will also cover the copies which could

776

# that will be fast enough and will also cover the copies which could

773

# be missed by heuristics

777

# be missed by heuristics

774

if _isfullcopytraceable(repo, c1, base):

778

if _isfullcopytraceable(repo, c1, base):

775

return _fullcopytracing(repo, c1, c2, base)

779

return _fullcopytracing(repo, c1, c2, base)

776

return _heuristicscopytracing(repo, c1, c2, base)

780

return _heuristicscopytracing(repo, c1, c2, base)

777

else:

781

else:

778

return _fullcopytracing(repo, c1, c2, base)

782

return _fullcopytracing(repo, c1, c2, base)

779

783

780

784

781

def _isfullcopytraceable(repo, c1, base):

785

def _isfullcopytraceable(repo, c1, base):

782

"""Checks that if base, source and destination are all no-public branches,

786

"""Checks that if base, source and destination are all no-public branches,

783

if yes let's use the full copytrace algorithm for increased capabilities

787

if yes let's use the full copytrace algorithm for increased capabilities

784

since it will be fast enough.

788

since it will be fast enough.

785

789

786

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

790

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

787

number of changesets from c1 to base such that if number of changesets are

791

number of changesets from c1 to base such that if number of changesets are

788

more than the limit, full copytracing algorithm won't be used.

792

more than the limit, full copytracing algorithm won't be used.

789

"""

793

"""

790

if c1.rev() is None:

794

if c1.rev() is None:

791

c1 = c1.p1()

795

c1 = c1.p1()

792

if c1.mutable() and base.mutable():

796

if c1.mutable() and base.mutable():

793

sourcecommitlimit = repo.ui.configint(

797

sourcecommitlimit = repo.ui.configint(

794

b'experimental', b'copytrace.sourcecommitlimit'

798

b'experimental', b'copytrace.sourcecommitlimit'

795

)

799

)

796

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

800

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

797

return commits < sourcecommitlimit

801

return commits < sourcecommitlimit

798

return False

802

return False

799

803

800

804

801

def _checksinglesidecopies(

805

def _checksinglesidecopies(

802

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

806

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

803

):

807

):

804

if src not in m2:

808

if src not in m2:

805

# deleted on side 2

809

# deleted on side 2

806

if src not in m1:

810

if src not in m1:

807

# renamed on side 1, deleted on side 2

811

# renamed on side 1, deleted on side 2

808

renamedelete[src] = dsts1

812

renamedelete[src] = dsts1

809

elif src not in mb:

813

elif src not in mb:

810

# Work around the "short-circuit to avoid issues with merge states"

814

# Work around the "short-circuit to avoid issues with merge states"

811

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

815

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

812

# destination doesn't exist in y.

816

# destination doesn't exist in y.

813

pass

817

pass

814

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

818

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

815

return

819

return

816

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

820

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

817

# modified on side 2

821

# modified on side 2

818

for dst in dsts1:

822

for dst in dsts1:

819

copy[dst] = src

823

copy[dst] = src

820

824

821

825

822

class branch_copies(object):

826

class branch_copies(object):

823

"""Information about copies made on one side of a merge/graft.

827

"""Information about copies made on one side of a merge/graft.

824

828

825

"copy" is a mapping from destination name -> source name,

829

"copy" is a mapping from destination name -> source name,

826

where source is in c1 and destination is in c2 or vice-versa.

830

where source is in c1 and destination is in c2 or vice-versa.

827

831

828

"movewithdir" is a mapping from source name -> destination name,

832

"movewithdir" is a mapping from source name -> destination name,

829

where the file at source present in one context but not the other

833

where the file at source present in one context but not the other

830

needs to be moved to destination by the merge process, because the

834

needs to be moved to destination by the merge process, because the

831

other context moved the directory it is in.

835

other context moved the directory it is in.

832

836

833

"renamedelete" is a mapping of source name -> list of destination

837

"renamedelete" is a mapping of source name -> list of destination

834

names for files deleted in c1 that were renamed in c2 or vice-versa.

838

names for files deleted in c1 that were renamed in c2 or vice-versa.

835

839

836

"dirmove" is a mapping of detected source dir -> destination dir renames.

840

"dirmove" is a mapping of detected source dir -> destination dir renames.

837

This is needed for handling changes to new files previously grafted into

841

This is needed for handling changes to new files previously grafted into

838

renamed directories.

842

renamed directories.

839

"""

843

"""

840

844

841

def __init__(

845

def __init__(

842

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

846

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

843

):

847

):

844

self.copy = {} if copy is None else copy

848

self.copy = {} if copy is None else copy

845

self.renamedelete = {} if renamedelete is None else renamedelete

849

self.renamedelete = {} if renamedelete is None else renamedelete

846

self.dirmove = {} if dirmove is None else dirmove

850

self.dirmove = {} if dirmove is None else dirmove

847

self.movewithdir = {} if movewithdir is None else movewithdir

851

self.movewithdir = {} if movewithdir is None else movewithdir

848

852

849

def __repr__(self):

853

def __repr__(self):

850

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

854

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

851

self.copy,

855

self.copy,

852

self.renamedelete,

856

self.renamedelete,

853

self.dirmove,

857

self.dirmove,

854

self.movewithdir,

858

self.movewithdir,

855

)

859

)

856

860

857

861

858

def _fullcopytracing(repo, c1, c2, base):

862

def _fullcopytracing(repo, c1, c2, base):

859

"""The full copytracing algorithm which finds all the new files that were

863

"""The full copytracing algorithm which finds all the new files that were

860

added from merge base up to the top commit and for each file it checks if

864

added from merge base up to the top commit and for each file it checks if

861

this file was copied from another file.

865

this file was copied from another file.

862

866

863

This is pretty slow when a lot of changesets are involved but will track all

867

This is pretty slow when a lot of changesets are involved but will track all

864

the copies.

868

the copies.

865

"""

869

"""

866

m1 = c1.manifest()

870

m1 = c1.manifest()

867

m2 = c2.manifest()

871

m2 = c2.manifest()

868

mb = base.manifest()

872

mb = base.manifest()

869

873

870

copies1 = pathcopies(base, c1)

874

copies1 = pathcopies(base, c1)

871

copies2 = pathcopies(base, c2)

875

copies2 = pathcopies(base, c2)

872

876

873

if not (copies1 or copies2):

877

if not (copies1 or copies2):

874

return branch_copies(), branch_copies(), {}

878

return branch_copies(), branch_copies(), {}

875

879

876

inversecopies1 = {}

880

inversecopies1 = {}

877

inversecopies2 = {}

881

inversecopies2 = {}

878

for dst, src in copies1.items():

882

for dst, src in copies1.items():

879

inversecopies1.setdefault(src, []).append(dst)

883

inversecopies1.setdefault(src, []).append(dst)

880

for dst, src in copies2.items():

884

for dst, src in copies2.items():

881

inversecopies2.setdefault(src, []).append(dst)

885

inversecopies2.setdefault(src, []).append(dst)

882

886

883

copy1 = {}

887

copy1 = {}

884

copy2 = {}

888

copy2 = {}

885

diverge = {}

889

diverge = {}

886

renamedelete1 = {}

890

renamedelete1 = {}

887

renamedelete2 = {}

891

renamedelete2 = {}

888

allsources = set(inversecopies1) | set(inversecopies2)

892

allsources = set(inversecopies1) | set(inversecopies2)

889

for src in allsources:

893

for src in allsources:

890

dsts1 = inversecopies1.get(src)

894

dsts1 = inversecopies1.get(src)

891

dsts2 = inversecopies2.get(src)

895

dsts2 = inversecopies2.get(src)

892

if dsts1 and dsts2:

896

if dsts1 and dsts2:

893

# copied/renamed on both sides

897

# copied/renamed on both sides

894

if src not in m1 and src not in m2:

898

if src not in m1 and src not in m2:

895

# renamed on both sides

899

# renamed on both sides

896

dsts1 = set(dsts1)

900

dsts1 = set(dsts1)

897

dsts2 = set(dsts2)

901

dsts2 = set(dsts2)

898

# If there's some overlap in the rename destinations, we

902

# If there's some overlap in the rename destinations, we

899

# consider it not divergent. For example, if side 1 copies 'a'

903

# consider it not divergent. For example, if side 1 copies 'a'

900

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

904

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

901

# and 'd' and deletes 'a'.

905

# and 'd' and deletes 'a'.

902

if dsts1 & dsts2:

906

if dsts1 & dsts2:

903

for dst in dsts1 & dsts2:

907

for dst in dsts1 & dsts2:

904

copy1[dst] = src

908

copy1[dst] = src

905

copy2[dst] = src

909

copy2[dst] = src

906

else:

910

else:

907

diverge[src] = sorted(dsts1 | dsts2)

911

diverge[src] = sorted(dsts1 | dsts2)

908

elif src in m1 and src in m2:

912

elif src in m1 and src in m2:

909

# copied on both sides

913

# copied on both sides

910

dsts1 = set(dsts1)

914

dsts1 = set(dsts1)

911

dsts2 = set(dsts2)

915

dsts2 = set(dsts2)

912

for dst in dsts1 & dsts2:

916

for dst in dsts1 & dsts2:

913

copy1[dst] = src

917

copy1[dst] = src

914

copy2[dst] = src

918

copy2[dst] = src

915

# TODO: Handle cases where it was renamed on one side and copied

919

# TODO: Handle cases where it was renamed on one side and copied

916

# on the other side

920

# on the other side

917

elif dsts1:

921

elif dsts1:

918

# copied/renamed only on side 1

922

# copied/renamed only on side 1

919

_checksinglesidecopies(

923

_checksinglesidecopies(

920

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

924

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

921

)

925

)

922

elif dsts2:

926

elif dsts2:

923

# copied/renamed only on side 2

927

# copied/renamed only on side 2

924

_checksinglesidecopies(

928

_checksinglesidecopies(

925

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

929

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

926

)

930

)

927

931

928

# find interesting file sets from manifests

932

# find interesting file sets from manifests

929

cache = []

933

cache = []

930

934

931

def _get_addedfiles(idx):

935

def _get_addedfiles(idx):

932

if not cache:

936

if not cache:

933

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

937

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

934

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

938

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

935

u1 = sorted(addedinm1 - addedinm2)

939

u1 = sorted(addedinm1 - addedinm2)

936

u2 = sorted(addedinm2 - addedinm1)

940

u2 = sorted(addedinm2 - addedinm1)

937

cache.extend((u1, u2))

941

cache.extend((u1, u2))

938

return cache[idx]

942

return cache[idx]

939

943

940

u1fn = lambda: _get_addedfiles(0)

944

u1fn = lambda: _get_addedfiles(0)

941

u2fn = lambda: _get_addedfiles(1)

945

u2fn = lambda: _get_addedfiles(1)

942

if repo.ui.debugflag:

946

if repo.ui.debugflag:

943

u1 = u1fn()

947

u1 = u1fn()

944

u2 = u2fn()

948

u2 = u2fn()

945

949

946

header = b" unmatched files in %s"

950

header = b" unmatched files in %s"

947

if u1:

951

if u1:

948

repo.ui.debug(

952

repo.ui.debug(

949

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

953

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

950

)

954

)

951

if u2:

955

if u2:

952

repo.ui.debug(

956

repo.ui.debug(

953

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

957

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

954

)

958

)

955

959

956

renamedeleteset = set()

960

renamedeleteset = set()

957

divergeset = set()

961

divergeset = set()

958

for dsts in diverge.values():

962

for dsts in diverge.values():

959

divergeset.update(dsts)

963

divergeset.update(dsts)

960

for dsts in renamedelete1.values():

964

for dsts in renamedelete1.values():

961

renamedeleteset.update(dsts)

965

renamedeleteset.update(dsts)

962

for dsts in renamedelete2.values():

966

for dsts in renamedelete2.values():

963

renamedeleteset.update(dsts)

967

renamedeleteset.update(dsts)

964

968

965

repo.ui.debug(

969

repo.ui.debug(

966

b" all copies found (* = to merge, ! = divergent, "

970

b" all copies found (* = to merge, ! = divergent, "

967

b"% = renamed and deleted):\n"

971

b"% = renamed and deleted):\n"

968

)

972

)

969

for side, copies in ((b"local", copies1), (b"remote", copies2)):

973

for side, copies in ((b"local", copies1), (b"remote", copies2)):

970

if not copies:

974

if not copies:

971

continue

975

continue

972

repo.ui.debug(b" on %s side:\n" % side)

976

repo.ui.debug(b" on %s side:\n" % side)

973

for f in sorted(copies):

977

for f in sorted(copies):

974

note = b""

978

note = b""

975

if f in copy1 or f in copy2:

979

if f in copy1 or f in copy2:

976

note += b"*"

980

note += b"*"

977

if f in divergeset:

981

if f in divergeset:

978

note += b"!"

982

note += b"!"

979

if f in renamedeleteset:

983

if f in renamedeleteset:

980

note += b"%"

984

note += b"%"

981

repo.ui.debug(

985

repo.ui.debug(

982

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

986

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

983

)

987

)

984

del renamedeleteset

988

del renamedeleteset

985

del divergeset

989

del divergeset

986

990

987

repo.ui.debug(b" checking for directory renames\n")

991

repo.ui.debug(b" checking for directory renames\n")

988

992

989

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

993

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

990

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

994

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

991

995

992

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

996

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

993

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

997

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

994

998

995

return branch_copies1, branch_copies2, diverge

999

return branch_copies1, branch_copies2, diverge

996

1000

997

1001

998

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1002

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

999

"""Finds moved directories and files that should move with them.

1003

"""Finds moved directories and files that should move with them.

1000

1004

1001

ctx: the context for one of the sides

1005

ctx: the context for one of the sides

1002

copy: files copied on the same side (as ctx)

1006

copy: files copied on the same side (as ctx)

1003

fullcopy: files copied on the same side (as ctx), including those that

1007

fullcopy: files copied on the same side (as ctx), including those that

1004

merge.manifestmerge() won't care about

1008

merge.manifestmerge() won't care about

1005

addedfilesfn: function returning added files on the other side (compared to

1009

addedfilesfn: function returning added files on the other side (compared to

1006

ctx)

1010

ctx)

1007

"""

1011

"""

1008

# generate a directory move map

1012

# generate a directory move map

1009

invalid = set()

1013

invalid = set()

1010

dirmove = {}

1014

dirmove = {}

1011

1015

1012

# examine each file copy for a potential directory move, which is

1016

# examine each file copy for a potential directory move, which is

1013

# when all the files in a directory are moved to a new directory

1017

# when all the files in a directory are moved to a new directory

1014

for dst, src in pycompat.iteritems(fullcopy):

1018

for dst, src in pycompat.iteritems(fullcopy):

1015

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1019

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1016

if dsrc in invalid:

1020

if dsrc in invalid:

1017

# already seen to be uninteresting

1021

# already seen to be uninteresting

1018

continue

1022

continue

1019

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1023

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1020

# directory wasn't entirely moved locally

1024

# directory wasn't entirely moved locally

1021

invalid.add(dsrc)

1025

invalid.add(dsrc)

1022

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1026

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1023

# files from the same directory moved to two different places

1027

# files from the same directory moved to two different places

1024

invalid.add(dsrc)

1028

invalid.add(dsrc)

1025

else:

1029

else:

1026

# looks good so far

1030

# looks good so far

1027

dirmove[dsrc] = ddst

1031

dirmove[dsrc] = ddst

1028

1032

1029

for i in invalid:

1033

for i in invalid:

1030

if i in dirmove:

1034

if i in dirmove:

1031

del dirmove[i]

1035

del dirmove[i]

1032

del invalid

1036

del invalid

1033

1037

1034

if not dirmove:

1038

if not dirmove:

1035

return {}, {}

1039

return {}, {}

1036

1040

1037

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1041

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1038

1042

1039

for d in dirmove:

1043

for d in dirmove:

1040

repo.ui.debug(

1044

repo.ui.debug(

1041

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1045

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1042

)

1046

)

1043

1047

1044

movewithdir = {}

1048

movewithdir = {}

1045

# check unaccounted nonoverlapping files against directory moves

1049

# check unaccounted nonoverlapping files against directory moves

1046

for f in addedfilesfn():

1050

for f in addedfilesfn():

1047

if f not in fullcopy:

1051

if f not in fullcopy:

1048

for d in dirmove:

1052

for d in dirmove:

1049

if f.startswith(d):

1053

if f.startswith(d):

1050

# new file added in a directory that was moved, move it

1054

# new file added in a directory that was moved, move it

1051

df = dirmove[d] + f[len(d) :]

1055

df = dirmove[d] + f[len(d) :]

1052

if df not in copy:

1056

if df not in copy:

1053

movewithdir[f] = df

1057

movewithdir[f] = df

1054

repo.ui.debug(

1058

repo.ui.debug(

1055

b" pending file src: '%s' -> dst: '%s'\n"

1059

b" pending file src: '%s' -> dst: '%s'\n"

1056

% (f, df)

1060

% (f, df)

1057

)

1061

)

1058

break

1062

break

1059

1063

1060

return dirmove, movewithdir

1064

return dirmove, movewithdir

1061

1065

1062

1066

1063

def _heuristicscopytracing(repo, c1, c2, base):

1067

def _heuristicscopytracing(repo, c1, c2, base):

1064

"""Fast copytracing using filename heuristics

1068

"""Fast copytracing using filename heuristics

1065

1069

1066

Assumes that moves or renames are of following two types:

1070

Assumes that moves or renames are of following two types:

1067

1071

1068

1) Inside a directory only (same directory name but different filenames)

1072

1) Inside a directory only (same directory name but different filenames)

1069

2) Move from one directory to another

1073

2) Move from one directory to another

1070

(same filenames but different directory names)

1074

(same filenames but different directory names)

1071

1075

1072

Works only when there are no merge commits in the "source branch".

1076

Works only when there are no merge commits in the "source branch".

1073

Source branch is commits from base up to c2 not including base.

1077

Source branch is commits from base up to c2 not including base.

1074

1078

1075

If merge is involved it fallbacks to _fullcopytracing().

1079

If merge is involved it fallbacks to _fullcopytracing().

1076

1080

1077

Can be used by setting the following config:

1081

Can be used by setting the following config:

1078

1082

1079

[experimental]

1083

[experimental]

1080

copytrace = heuristics

1084

copytrace = heuristics

1081

1085

1082

In some cases the copy/move candidates found by heuristics can be very large

1086

In some cases the copy/move candidates found by heuristics can be very large

1083

in number and that will make the algorithm slow. The number of possible

1087

in number and that will make the algorithm slow. The number of possible

1084

candidates to check can be limited by using the config

1088

candidates to check can be limited by using the config

1085

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1089

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1086

"""

1090

"""

1087

1091

1088

if c1.rev() is None:

1092

if c1.rev() is None:

1089

c1 = c1.p1()

1093

c1 = c1.p1()

1090

if c2.rev() is None:

1094

if c2.rev() is None:

1091

c2 = c2.p1()

1095

c2 = c2.p1()

1092

1096

1093

changedfiles = set()

1097

changedfiles = set()

1094

m1 = c1.manifest()

1098

m1 = c1.manifest()

1095

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1099

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1096

# If base is not in c2 branch, we switch to fullcopytracing

1100

# If base is not in c2 branch, we switch to fullcopytracing

1097

repo.ui.debug(

1101

repo.ui.debug(

1098

b"switching to full copytracing as base is not "

1102

b"switching to full copytracing as base is not "

1099

b"an ancestor of c2\n"

1103

b"an ancestor of c2\n"

1100

)

1104

)

1101

return _fullcopytracing(repo, c1, c2, base)

1105

return _fullcopytracing(repo, c1, c2, base)

1102

1106

1103

ctx = c2

1107

ctx = c2

1104

while ctx != base:

1108

while ctx != base:

1105

if len(ctx.parents()) == 2:

1109

if len(ctx.parents()) == 2:

1106

# To keep things simple let's not handle merges

1110

# To keep things simple let's not handle merges

1107

repo.ui.debug(b"switching to full copytracing because of merges\n")

1111

repo.ui.debug(b"switching to full copytracing because of merges\n")

1108

return _fullcopytracing(repo, c1, c2, base)

1112

return _fullcopytracing(repo, c1, c2, base)

1109

changedfiles.update(ctx.files())

1113

changedfiles.update(ctx.files())

1110

ctx = ctx.p1()

1114

ctx = ctx.p1()

1111

1115

1112

copies2 = {}

1116

copies2 = {}

1113

cp = _forwardcopies(base, c2)

1117

cp = _forwardcopies(base, c2)

1114

for dst, src in pycompat.iteritems(cp):

1118

for dst, src in pycompat.iteritems(cp):

1115

if src in m1:

1119

if src in m1:

1116

copies2[dst] = src

1120

copies2[dst] = src

1117

1121

1118

# file is missing if it isn't present in the destination, but is present in

1122

# file is missing if it isn't present in the destination, but is present in

1119

# the base and present in the source.

1123

# the base and present in the source.

1120

# Presence in the base is important to exclude added files, presence in the

1124

# Presence in the base is important to exclude added files, presence in the

1121

# source is important to exclude removed files.

1125

# source is important to exclude removed files.

1122

filt = lambda f: f not in m1 and f in base and f in c2

1126

filt = lambda f: f not in m1 and f in base and f in c2

1123

missingfiles = [f for f in changedfiles if filt(f)]

1127

missingfiles = [f for f in changedfiles if filt(f)]

1124

1128

1125

copies1 = {}

1129

copies1 = {}

1126

if missingfiles:

1130

if missingfiles:

1127

basenametofilename = collections.defaultdict(list)

1131

basenametofilename = collections.defaultdict(list)

1128

dirnametofilename = collections.defaultdict(list)

1132

dirnametofilename = collections.defaultdict(list)

1129

1133

1130

for f in m1.filesnotin(base.manifest()):

1134

for f in m1.filesnotin(base.manifest()):

1131

basename = os.path.basename(f)

1135

basename = os.path.basename(f)

1132

dirname = os.path.dirname(f)

1136

dirname = os.path.dirname(f)

1133

basenametofilename[basename].append(f)

1137

basenametofilename[basename].append(f)

1134

dirnametofilename[dirname].append(f)

1138

dirnametofilename[dirname].append(f)

1135

1139

1136

for f in missingfiles:

1140

for f in missingfiles:

1137

basename = os.path.basename(f)

1141

basename = os.path.basename(f)

1138

dirname = os.path.dirname(f)

1142

dirname = os.path.dirname(f)

1139

samebasename = basenametofilename[basename]

1143

samebasename = basenametofilename[basename]

1140

samedirname = dirnametofilename[dirname]

1144

samedirname = dirnametofilename[dirname]

1141

movecandidates = samebasename + samedirname

1145

movecandidates = samebasename + samedirname

1142

# f is guaranteed to be present in c2, that's why

1146

# f is guaranteed to be present in c2, that's why

1143

# c2.filectx(f) won't fail

1147

# c2.filectx(f) won't fail

1144

f2 = c2.filectx(f)

1148

f2 = c2.filectx(f)

1145

# we can have a lot of candidates which can slow down the heuristics

1149

# we can have a lot of candidates which can slow down the heuristics

1146

# config value to limit the number of candidates moves to check

1150

# config value to limit the number of candidates moves to check

1147

maxcandidates = repo.ui.configint(

1151

maxcandidates = repo.ui.configint(

1148

b'experimental', b'copytrace.movecandidateslimit'

1152

b'experimental', b'copytrace.movecandidateslimit'

1149

)

1153

)

1150

1154

1151

if len(movecandidates) > maxcandidates:

1155

if len(movecandidates) > maxcandidates:

1152

repo.ui.status(

1156

repo.ui.status(

1153

_(

1157

_(

1154

b"skipping copytracing for '%s', more "

1158

b"skipping copytracing for '%s', more "

1155

b"candidates than the limit: %d\n"

1159

b"candidates than the limit: %d\n"

1156

)

1160

)

1157

% (f, len(movecandidates))

1161

% (f, len(movecandidates))

1158

)

1162

)

1159

continue

1163

continue

1160

1164

1161

for candidate in movecandidates:

1165

for candidate in movecandidates:

1162

f1 = c1.filectx(candidate)

1166

f1 = c1.filectx(candidate)

1163

if _related(f1, f2):

1167

if _related(f1, f2):

1164

# if there are a few related copies then we'll merge

1168

# if there are a few related copies then we'll merge

1165

# changes into all of them. This matches the behaviour

1169

# changes into all of them. This matches the behaviour

1166

# of upstream copytracing

1170

# of upstream copytracing

1167

copies1[candidate] = f

1171

copies1[candidate] = f

1168

1172

1169

return branch_copies(copies1), branch_copies(copies2), {}

1173

return branch_copies(copies1), branch_copies(copies2), {}

1170

1174

1171

1175

1172

def _related(f1, f2):

1176

def _related(f1, f2):

1173

"""return True if f1 and f2 filectx have a common ancestor

1177

"""return True if f1 and f2 filectx have a common ancestor

1174

1178

1175

Walk back to common ancestor to see if the two files originate

1179

Walk back to common ancestor to see if the two files originate

1176

from the same file. Since workingfilectx's rev() is None it messes

1180

from the same file. Since workingfilectx's rev() is None it messes

1177

up the integer comparison logic, hence the pre-step check for

1181

up the integer comparison logic, hence the pre-step check for

1178

None (f1 and f2 can only be workingfilectx's initially).

1182

None (f1 and f2 can only be workingfilectx's initially).

1179

"""

1183

"""

1180

1184

1181

if f1 == f2:

1185

if f1 == f2:

1182

return True # a match

1186

return True # a match

1183

1187

1184

g1, g2 = f1.ancestors(), f2.ancestors()

1188

g1, g2 = f1.ancestors(), f2.ancestors()

1185

try:

1189

try:

1186

f1r, f2r = f1.linkrev(), f2.linkrev()

1190

f1r, f2r = f1.linkrev(), f2.linkrev()

1187

1191

1188

if f1r is None:

1192

if f1r is None:

1189

f1 = next(g1)

1193

f1 = next(g1)

1190

if f2r is None:

1194

if f2r is None:

1191

f2 = next(g2)

1195

f2 = next(g2)

1192

1196

1193

while True:

1197

while True:

1194

f1r, f2r = f1.linkrev(), f2.linkrev()

1198

f1r, f2r = f1.linkrev(), f2.linkrev()

1195

if f1r > f2r:

1199

if f1r > f2r:

1196

f1 = next(g1)

1200

f1 = next(g1)

1197

elif f2r > f1r:

1201

elif f2r > f1r:

1198

f2 = next(g2)

1202

f2 = next(g2)

1199

else: # f1 and f2 point to files in the same linkrev

1203

else: # f1 and f2 point to files in the same linkrev

1200

return f1 == f2 # true if they point to the same file

1204

return f1 == f2 # true if they point to the same file

1201

except StopIteration:

1205

except StopIteration:

1202

return False

1206

return False

1203

1207

1204

1208

1205

def graftcopies(wctx, ctx, base):

1209

def graftcopies(wctx, ctx, base):

1206

"""reproduce copies between base and ctx in the wctx

1210

"""reproduce copies between base and ctx in the wctx

1207

1211

1208

Unlike mergecopies(), this function will only consider copies between base

1212

Unlike mergecopies(), this function will only consider copies between base

1209

and ctx; it will ignore copies between base and wctx. Also unlike

1213

and ctx; it will ignore copies between base and wctx. Also unlike

1210

mergecopies(), this function will apply copies to the working copy (instead

1214

mergecopies(), this function will apply copies to the working copy (instead

1211

of just returning information about the copies). That makes it cheaper

1215

of just returning information about the copies). That makes it cheaper

1212

(especially in the common case of base==ctx.p1()) and useful also when

1216

(especially in the common case of base==ctx.p1()) and useful also when

1213

experimental.copytrace=off.

1217

experimental.copytrace=off.

1214

1218

1215

merge.update() will have already marked most copies, but it will only

1219

merge.update() will have already marked most copies, but it will only

1216

mark copies if it thinks the source files are related (see

1220

mark copies if it thinks the source files are related (see

1217

merge._related()). It will also not mark copies if the file wasn't modified

1221

merge._related()). It will also not mark copies if the file wasn't modified

1218

on the local side. This function adds the copies that were "missed"

1222

on the local side. This function adds the copies that were "missed"

1219

by merge.update().

1223

by merge.update().

1220

"""

1224

"""

1221

new_copies = pathcopies(base, ctx)

1225

new_copies = pathcopies(base, ctx)

1222

_filter(wctx.p1(), wctx, new_copies)

1226

_filter(wctx.p1(), wctx, new_copies)

1223

for dst, src in pycompat.iteritems(new_copies):

1227

for dst, src in pycompat.iteritems(new_copies):

1224

wctx[dst].markcopied(src)

1228

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import (
                 nullid,
                 nullrev,
             )
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 if rustmod is not None:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         return (p1, p2, raw)
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         else:
                             changes = None
                         return (p1, p2, changes)
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
                     # this deal with the special case mentionned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
                     # We iterate over the childre, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
                     # An interresting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     return _combine_changeset_copies(
                         revs, children_count, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None:
                     final_copies = rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, isancestor
                     )
                 else:
                     isancestor = cached_is_ancestor(isancestor)
                     all_copies = {}
                     # iterate over all the "children" side of copy tracing "edge"
                     for current_rev in revs:
                         p1, p2, changes = revinfo(current_rev)
                         current_copies = None
                         # iterate over all parents to chain the existing data with the
                         # data from the parent → child edge.
                         for parent, parent_rev in ((1, p1), (2, p2)):
                             if parent_rev == nullrev:
                                 continue
                             remaining_children = children_count.get(parent_rev)
                             if remaining_children is None:
                                 continue
                             remaining_children -= 1
                             children_count[parent_rev] = remaining_children
                             if remaining_children:
                                 copies = all_copies.get(parent_rev, None)
                             else:
                                 copies = all_copies.pop(parent_rev, None)
                             if copies is None:
                                 # this is a root
                                 copies = {}
                             newcopies = copies
                             # chain the data in the edge with the existing data
                             if changes is not None:
                                 childcopies = {}
                                 if parent == 1:
                                     childcopies = changes.copied_from_p1
                                 elif parent == 2:
                                     childcopies = changes.copied_from_p2
                                 if childcopies:
                                     newcopies = copies.copy()
                                     for dest, source in pycompat.iteritems(childcopies):
                                         prev = copies.get(source)
                                         if prev is not None and prev[1] is not None:
                                             source = prev[1]
                                         newcopies[dest] = (current_rev, source)
                                     assert newcopies is not copies
                                 if changes.removed:
                                     if newcopies is copies:
                                         newcopies = copies.copy()
                                     for f in changes.removed:
                                         if f in newcopies:
                                             if newcopies is copies:
                                                 # copy on write to avoid affecting potential other
                                                 # branches.  when there are no other branches, this
                                                 # could be avoided.
                                                 newcopies = copies.copy()
                                             newcopies[f] = (current_rev, None)
                             # check potential need to combine the data from another parent (for
                             # that child). See comment below for details.
                             if current_copies is None:
                                 current_copies = newcopies
                             elif current_copies is newcopies:
                                 # nothing to merge:
                                 pass
                             else:
                                 # we are the second parent to work on c, we need to merge our
                                 # work with the other.
                                 #
                                 # In case of conflict, parent 1 take precedence over parent 2.
                                 # This is an arbitrary choice made anew when implementing
                                 # changeset based copies. It was made without regards with
                                 # potential filelog related behavior.
                                 assert parent == 2
                                 current_copies = _merge_copies_dict(
                                     newcopies, current_copies, isancestor, changes
                                 )
                         all_copies[current_rev] = current_copies
                     # filter out internal details and return a {dest: source mapping}
                     final_copies = {}
                     for dest, (tt, source) in all_copies[targetrev].items():
                         if source is not None:
                             final_copies[dest] = source
                 if not alwaysmatch:
                     for filename in list(final_copies.keys()):
                         if not match(filename):
                             del final_copies[filename]
                 return final_copies
             # constant to decide which side to pick with _merge_copies_dict
             PICK_MINOR = 0
             PICK_MAJOR = 1
             PICK_EITHER = 2
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         pick = _compare_values(changes, isancestor, dest, other, value)
                         if pick == PICK_MAJOR:
                             minor[dest] = value
                 return minor
             def _compare_values(changes, isancestor, dest, minor, major):
                 """compare two value within a _merge_copies_dict loop iteration"""
-                major_tt = major[0]
+                major_tt, major_value = major
-                minor_tt = minor[0]
+                minor_tt, minor_value = minor
-                if major[1] == minor[1]:
+                # evacuate some simple case first:
+                if major_tt == minor_tt:
+                    # if it comes from the same revision it must be the same value
+                    assert major_value == minor_value
                     return PICK_EITHER
-                # content from "major" wins, unless it is older
+                elif major[1] == minor[1]:
-                # than the branch point or there is a merge
+                    return PICK_EITHER
-                if major_tt == minor_tt:
-                    return PICK_MAJOR
+                # actual merging needed: content from "major" wins, unless it is older than
+                # the branch point or there is a merge
                 elif changes is not None and major[1] is None and dest in changes.salvaged:
                     return PICK_MINOR
                 elif changes is not None and minor[1] is None and dest in changes.salvaged:
                     return PICK_MAJOR
                 elif changes is not None and dest in changes.merged:
                     return PICK_MAJOR
                 elif not isancestor(major_tt, minor_tt):
                     if major[1] is not None:
                         return PICK_MAJOR
                     elif isancestor(minor_tt, major_tt):
                         return PICK_MAJOR
                 return PICK_MINOR
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)