upstream/mercurial-mirror Commit - r46777:6b9d6529

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import (

15

from .node import (

16

nullid,

16

nullid,

17

nullrev,

17

nullrev,

18

)

18

)

19

20

from . import (

20

from . import (

21

match as matchmod,

21

match as matchmod,

22

pathutil,

22

pathutil,

23

policy,

23

policy,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

from .revlogutils import (

31

from .revlogutils import (

32

flagutil,

32

flagutil,

33

sidedata as sidedatamod,

33

sidedata as sidedatamod,

34

)

34

)

35

36

rustmod = policy.importrust("copy_tracing")

36

rustmod = policy.importrust("copy_tracing")

37

38

39

def _filter(src, dst, t):

39

def _filter(src, dst, t):

40

"""filters out invalid copies after chaining"""

40

"""filters out invalid copies after chaining"""

41

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

44

# in the following table (not including trivial cases). For example, case 2

44

# in the following table (not including trivial cases). For example, case 2

45

# is where a file existed in 'src' and remained under that name in 'mid' and

45

# is where a file existed in 'src' and remained under that name in 'mid' and

46

# then was renamed between 'mid' and 'dst'.

46

# then was renamed between 'mid' and 'dst'.

47

#

47

#

48

# case src mid dst result

48

# case src mid dst result

49

# 1 x y - -

49

# 1 x y - -

50

# 2 x y y x->y

50

# 2 x y y x->y

51

# 3 x y x -

51

# 3 x y x -

52

# 4 x y z x->z

52

# 4 x y z x->z

53

# 5 - x y -

53

# 5 - x y -

54

# 6 x x y x->y

54

# 6 x x y x->y

55

#

55

#

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

58

# between 5 and 6, so it includes all cases in its result.

58

# between 5 and 6, so it includes all cases in its result.

59

# Cases 1, 3, and 5 are then removed by _filter().

59

# Cases 1, 3, and 5 are then removed by _filter().

60

61

for k, v in list(t.items()):

61

for k, v in list(t.items()):

62

# remove copies from files that didn't exist

62

# remove copies from files that didn't exist

63

if v not in src:

63

if v not in src:

64

del t[k]

64

del t[k]

65

# remove criss-crossed copies

65

# remove criss-crossed copies

66

elif k in src and v in dst:

66

elif k in src and v in dst:

67

del t[k]

67

del t[k]

68

# remove copies to files that were then removed

68

# remove copies to files that were then removed

69

elif k not in dst:

69

elif k not in dst:

70

del t[k]

70

del t[k]

71

72

73

def _chain(prefix, suffix):

73

def _chain(prefix, suffix):

74

"""chain two sets of copies 'prefix' and 'suffix'"""

74

"""chain two sets of copies 'prefix' and 'suffix'"""

75

result = prefix.copy()

75

result = prefix.copy()

76

for key, value in pycompat.iteritems(suffix):

76

for key, value in pycompat.iteritems(suffix):

77

result[key] = prefix.get(value, value)

77

result[key] = prefix.get(value, value)

78

return result

78

return result

79

80

81

def _tracefile(fctx, am, basemf):

81

def _tracefile(fctx, am, basemf):

82

"""return file context that is the ancestor of fctx present in ancestor

82

"""return file context that is the ancestor of fctx present in ancestor

83

manifest am

83

manifest am

84

85

Note: we used to try and stop after a given limit, however checking if that

85

Note: we used to try and stop after a given limit, however checking if that

86

limit is reached turned out to be very expensive. we are better off

86

limit is reached turned out to be very expensive. we are better off

87

disabling that feature."""

87

disabling that feature."""

88

89

for f in fctx.ancestors():

89

for f in fctx.ancestors():

90

path = f.path()

90

path = f.path()

91

if am.get(path, None) == f.filenode():

91

if am.get(path, None) == f.filenode():

92

return path

92

return path

93

if basemf and basemf.get(path, None) == f.filenode():

93

if basemf and basemf.get(path, None) == f.filenode():

94

return path

94

return path

95

96

97

def _dirstatecopies(repo, match=None):

97

def _dirstatecopies(repo, match=None):

98

ds = repo.dirstate

98

ds = repo.dirstate

99

c = ds.copies().copy()

99

c = ds.copies().copy()

100

for k in list(c):

100

for k in list(c):

101

if ds[k] not in b'anm' or (match and not match(k)):

101

if ds[k] not in b'anm' or (match and not match(k)):

102

del c[k]

102

del c[k]

103

return c

103

return c

104

105

106

def _computeforwardmissing(a, b, match=None):

106

def _computeforwardmissing(a, b, match=None):

107

"""Computes which files are in b but not a.

107

"""Computes which files are in b but not a.

108

This is its own function so extensions can easily wrap this call to see what

108

This is its own function so extensions can easily wrap this call to see what

109

files _forwardcopies is about to process.

109

files _forwardcopies is about to process.

110

"""

110

"""

111

ma = a.manifest()

111

ma = a.manifest()

112

mb = b.manifest()

112

mb = b.manifest()

113

return mb.filesnotin(ma, match=match)

113

return mb.filesnotin(ma, match=match)

114

115

116

def usechangesetcentricalgo(repo):

116

def usechangesetcentricalgo(repo):

117

"""Checks if we should use changeset-centric copy algorithms"""

117

"""Checks if we should use changeset-centric copy algorithms"""

118

if repo.filecopiesmode == b'changeset-sidedata':

118

if repo.filecopiesmode == b'changeset-sidedata':

119

return True

119

return True

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

121

changesetsource = (b'changeset-only', b'compatibility')

121

changesetsource = (b'changeset-only', b'compatibility')

122

return readfrom in changesetsource

122

return readfrom in changesetsource

123

124

125

def _committedforwardcopies(a, b, base, match):

125

def _committedforwardcopies(a, b, base, match):

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

127

# files might have to be traced back to the fctx parent of the last

127

# files might have to be traced back to the fctx parent of the last

128

# one-side-only changeset, but not further back than that

128

# one-side-only changeset, but not further back than that

129

repo = a._repo

129

repo = a._repo

130

131

if usechangesetcentricalgo(repo):

131

if usechangesetcentricalgo(repo):

132

return _changesetforwardcopies(a, b, match)

132

return _changesetforwardcopies(a, b, match)

133

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

135

dbg = repo.ui.debug

135

dbg = repo.ui.debug

136

if debug:

136

if debug:

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

138

am = a.manifest()

138

am = a.manifest()

139

basemf = None if base is None else base.manifest()

139

basemf = None if base is None else base.manifest()

140

141

# find where new files came from

141

# find where new files came from

142

# we currently don't try to find where old files went, too expensive

142

# we currently don't try to find where old files went, too expensive

143

# this means we can miss a case like 'hg rm b; hg cp a b'

143

# this means we can miss a case like 'hg rm b; hg cp a b'

144

cm = {}

144

cm = {}

145

146

# Computing the forward missing is quite expensive on large manifests, since

146

# Computing the forward missing is quite expensive on large manifests, since

147

# it compares the entire manifests. We can optimize it in the common use

147

# it compares the entire manifests. We can optimize it in the common use

148

# case of computing what copies are in a commit versus its parent (like

148

# case of computing what copies are in a commit versus its parent (like

149

# during a rebase or histedit). Note, we exclude merge commits from this

149

# during a rebase or histedit). Note, we exclude merge commits from this

150

# optimization, since the ctx.files() for a merge commit is not correct for

150

# optimization, since the ctx.files() for a merge commit is not correct for

151

# this comparison.

151

# this comparison.

152

forwardmissingmatch = match

152

forwardmissingmatch = match

153

if b.p1() == a and b.p2().node() == nullid:

153

if b.p1() == a and b.p2().node() == nullid:

154

filesmatcher = matchmod.exact(b.files())

154

filesmatcher = matchmod.exact(b.files())

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

157

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

159

160

if debug:

160

if debug:

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

162

163

for f in sorted(missing):

163

for f in sorted(missing):

164

if debug:

164

if debug:

165

dbg(b'debug.copies: tracing file: %s\n' % f)

165

dbg(b'debug.copies: tracing file: %s\n' % f)

166

fctx = b[f]

166

fctx = b[f]

167

fctx._ancestrycontext = ancestrycontext

167

fctx._ancestrycontext = ancestrycontext

168

169

if debug:

169

if debug:

170

start = util.timer()

170

start = util.timer()

171

opath = _tracefile(fctx, am, basemf)

171

opath = _tracefile(fctx, am, basemf)

172

if opath:

172

if opath:

173

if debug:

173

if debug:

174

dbg(b'debug.copies: rename of: %s\n' % opath)

174

dbg(b'debug.copies: rename of: %s\n' % opath)

175

cm[f] = opath

175

cm[f] = opath

176

if debug:

176

if debug:

177

dbg(

177

dbg(

178

b'debug.copies: time: %f seconds\n'

178

b'debug.copies: time: %f seconds\n'

179

% (util.timer() - start)

179

% (util.timer() - start)

180

)

180

)

181

return cm

181

return cm

182

183

184

def _revinfo_getter(repo, match):

184

def _revinfo_getter(repo, match):

185

"""returns a function that returns the following data given a <rev>"

185

"""returns a function that returns the following data given a <rev>"

186

187

* p1: revision number of first parent

187

* p1: revision number of first parent

188

* p2: revision number of first parent

188

* p2: revision number of first parent

189

* changes: a ChangingFiles object

189

* changes: a ChangingFiles object

190

"""

190

"""

191

cl = repo.changelog

191

cl = repo.changelog

192

parents = cl.parentrevs

192

parents = cl.parentrevs

193

flags = cl.flags

193

flags = cl.flags

194

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

196

197

changelogrevision = cl.changelogrevision

197

changelogrevision = cl.changelogrevision

198

199

if rustmod is not None:

199

if rustmod is not None:

200

201

def revinfo(rev):

201

def revinfo(rev):

202

p1, p2 = parents(rev)

202

p1, p2 = parents(rev)

203

if flags(rev) & HASCOPIESINFO:

203

if flags(rev) & HASCOPIESINFO:

204

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

204

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

205

else:

205

else:

206

raw = None

206

raw = None

207

return (p1, p2, raw)

207

return (p1, p2, raw)

208

209

else:

209

else:

210

211

def revinfo(rev):

211

def revinfo(rev):

212

p1, p2 = parents(rev)

212

p1, p2 = parents(rev)

213

if flags(rev) & HASCOPIESINFO:

213

if flags(rev) & HASCOPIESINFO:

214

changes = changelogrevision(rev).changes

214

changes = changelogrevision(rev).changes

215

else:

215

else:

216

changes = None

216

changes = None

217

return (p1, p2, changes)

217

return (p1, p2, changes)

218

219

return revinfo

219

return revinfo

220

221

222

def cached_is_ancestor(is_ancestor):

222

def cached_is_ancestor(is_ancestor):

223

"""return a cached version of is_ancestor"""

223

"""return a cached version of is_ancestor"""

224

cache = {}

224

cache = {}

225

226

def _is_ancestor(anc, desc):

226

def _is_ancestor(anc, desc):

227

if anc > desc:

227

if anc > desc:

228

return False

228

return False

229

elif anc == desc:

229

elif anc == desc:

230

return True

230

return True

231

key = (anc, desc)

231

key = (anc, desc)

232

ret = cache.get(key)

232

ret = cache.get(key)

233

if ret is None:

233

if ret is None:

234

ret = cache[key] = is_ancestor(anc, desc)

234

ret = cache[key] = is_ancestor(anc, desc)

235

return ret

235

return ret

236

237

return _is_ancestor

237

return _is_ancestor

238

239

240

def _changesetforwardcopies(a, b, match):

240

def _changesetforwardcopies(a, b, match):

241

if a.rev() in (nullrev, b.rev()):

241

if a.rev() in (nullrev, b.rev()):

242

return {}

242

return {}

243

244

repo = a.repo().unfiltered()

244

repo = a.repo().unfiltered()

245

children = {}

245

children = {}

246

247

cl = repo.changelog

247

cl = repo.changelog

248

isancestor = cl.isancestorrev

248

isancestor = cl.isancestorrev

249

250

# To track rename from "A" to B, we need to gather all parent → children

250

# To track rename from "A" to B, we need to gather all parent → children

251

# edges that are contains in `::B` but not in `::A`.

251

# edges that are contains in `::B` but not in `::A`.

252

#

252

#

253

#

253

#

254

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

254

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

255

# ::a`) and also all the "roots point", ie the parents of the exclusive set

255

# ::a`) and also all the "roots point", ie the parents of the exclusive set

256

# that belong to ::a. These are exactly all the revisions needed to express

256

# that belong to ::a. These are exactly all the revisions needed to express

257

# the parent → children we need to combine.

257

# the parent → children we need to combine.

258

#

258

#

259

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

259

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

260

# excluding paths that leads to roots that are not ancestors of `a`. We

260

# excluding paths that leads to roots that are not ancestors of `a`. We

261

# keep this out of the explanation because it is hard enough without this special case..

261

# keep this out of the explanation because it is hard enough without this special case..

262

263

parents = cl._uncheckedparentrevs

263

parents = cl._uncheckedparentrevs

264

graph_roots = (nullrev, nullrev)

264

graph_roots = (nullrev, nullrev)

265

266

ancestors = cl.ancestors([a.rev()], inclusive=True)

266

ancestors = cl.ancestors([a.rev()], inclusive=True)

267

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

267

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

268

roots = set()

268

roots = set()

269

has_graph_roots = False

269

has_graph_roots = False

270

271

# iterate over `only(B, A)`

271

# iterate over `only(B, A)`

272

for r in revs:

272

for r in revs:

273

ps = parents(r)

273

ps = parents(r)

274

if ps == graph_roots:

274

if ps == graph_roots:

275

has_graph_roots = True

275

has_graph_roots = True

276

else:

276

else:

277

p1, p2 = ps

277

p1, p2 = ps

278

279

# find all the "root points" (see larger comment above)

279

# find all the "root points" (see larger comment above)

280

if p1 != nullrev and p1 in ancestors:

280

if p1 != nullrev and p1 in ancestors:

281

roots.add(p1)

281

roots.add(p1)

282

if p2 != nullrev and p2 in ancestors:

282

if p2 != nullrev and p2 in ancestors:

283

roots.add(p2)

283

roots.add(p2)

284

if not roots:

284

if not roots:

285

# no common revision to track copies from

285

# no common revision to track copies from

286

return {}

286

return {}

287

if has_graph_roots:

287

if has_graph_roots:

288

# this deal with the special case mentionned in the [1] footnotes. We

288

# this deal with the special case mentionned in the [1] footnotes. We

289

# must filter out revisions that leads to non-common graphroots.

289

# must filter out revisions that leads to non-common graphroots.

290

roots = list(roots)

290

roots = list(roots)

291

m = min(roots)

291

m = min(roots)

292

h = [b.rev()]

292

h = [b.rev()]

293

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

293

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

294

roots_to_head = set(roots_to_head)

294

roots_to_head = set(roots_to_head)

295

revs = [r for r in revs if r in roots_to_head]

295

revs = [r for r in revs if r in roots_to_head]

296

297

if repo.filecopiesmode == b'changeset-sidedata':

297

if repo.filecopiesmode == b'changeset-sidedata':

298

# When using side-data, we will process the edges "from" the children.

298

# When using side-data, we will process the edges "from" the children.

299

# We iterate over the childre, gathering previous collected data for

299

# We iterate over the childre, gathering previous collected data for

300

# the parents. Do know when the parents data is no longer necessary, we

300

# the parents. Do know when the parents data is no longer necessary, we

301

# keep a counter of how many children each revision has.

301

# keep a counter of how many children each revision has.

302

#

302

#

303

# An interresting property of `children_count` is that it only contains

303

# An interresting property of `children_count` is that it only contains

304

# revision that will be relevant for a edge of the graph. So if a

304

# revision that will be relevant for a edge of the graph. So if a

305

# children has parent not in `children_count`, that edges should not be

305

# children has parent not in `children_count`, that edges should not be

306

# processed.

306

# processed.

307

children_count = dict((r, 0) for r in roots)

307

children_count = dict((r, 0) for r in roots)

308

for r in revs:

308

for r in revs:

309

for p in cl.parentrevs(r):

309

for p in cl.parentrevs(r):

310

if p == nullrev:

310

if p == nullrev:

311

continue

311

continue

312

children_count[r] = 0

312

children_count[r] = 0

313

if p in children_count:

313

if p in children_count:

314

children_count[p] += 1

314

children_count[p] += 1

315

revinfo = _revinfo_getter(repo, match)

315

revinfo = _revinfo_getter(repo, match)

316

return _combine_changeset_copies(

316

return _combine_changeset_copies(

317

revs, children_count, b.rev(), revinfo, match, isancestor

317

revs, children_count, b.rev(), revinfo, match, isancestor

318

)

318

)

319

else:

319

else:

320

# When not using side-data, we will process the edges "from" the parent.

320

# When not using side-data, we will process the edges "from" the parent.

321

# so we need a full mapping of the parent -> children relation.

321

# so we need a full mapping of the parent -> children relation.

322

children = dict((r, []) for r in roots)

322

children = dict((r, []) for r in roots)

323

for r in revs:

323

for r in revs:

324

for p in cl.parentrevs(r):

324

for p in cl.parentrevs(r):

325

if p == nullrev:

325

if p == nullrev:

326

continue

326

continue

327

children[r] = []

327

children[r] = []

328

if p in children:

328

if p in children:

329

children[p].append(r)

329

children[p].append(r)

330

x = revs.pop()

330

x = revs.pop()

331

assert x == b.rev()

331

assert x == b.rev()

332

revs.extend(roots)

332

revs.extend(roots)

333

revs.sort()

333

revs.sort()

334

335

revinfo = _revinfo_getter_extra(repo)

335

revinfo = _revinfo_getter_extra(repo)

336

return _combine_changeset_copies_extra(

336

return _combine_changeset_copies_extra(

337

revs, children, b.rev(), revinfo, match, isancestor

337

revs, children, b.rev(), revinfo, match, isancestor

338

)

338

)

339

340

341

def _combine_changeset_copies(

341

def _combine_changeset_copies(

342

revs, children_count, targetrev, revinfo, match, isancestor

342

revs, children_count, targetrev, revinfo, match, isancestor

343

):

343

):

344

"""combine the copies information for each item of iterrevs

344

"""combine the copies information for each item of iterrevs

345

346

revs: sorted iterable of revision to visit

346

revs: sorted iterable of revision to visit

347

children_count: a {parent: <number-of-relevant-children>} mapping.

347

children_count: a {parent: <number-of-relevant-children>} mapping.

348

targetrev: the final copies destination revision (not in iterrevs)

348

targetrev: the final copies destination revision (not in iterrevs)

349

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

349

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

350

match: a matcher

350

match: a matcher

351

352

It returns the aggregated copies information for `targetrev`.

352

It returns the aggregated copies information for `targetrev`.

353

"""

353

"""

354

355

alwaysmatch = match.always()

355

alwaysmatch = match.always()

356

357

if rustmod is not None:

357

if rustmod is not None:

358

final_copies = rustmod.combine_changeset_copies(

358

final_copies = rustmod.combine_changeset_copies(

359

list(revs), children_count, targetrev, revinfo, isancestor

359

list(revs), children_count, targetrev, revinfo, isancestor

360

)

360

)

361

else:

361

else:

362

isancestor = cached_is_ancestor(isancestor)

362

isancestor = cached_is_ancestor(isancestor)

363

364

all_copies = {}

364

all_copies = {}

365

# iterate over all the "children" side of copy tracing "edge"

365

# iterate over all the "children" side of copy tracing "edge"

366

for current_rev in revs:

366

for current_rev in revs:

367

p1, p2, changes = revinfo(current_rev)

367

p1, p2, changes = revinfo(current_rev)

368

current_copies = None

368

current_copies = None

369

# iterate over all parents to chain the existing data with the

369

# iterate over all parents to chain the existing data with the

370

# data from the parent → child edge.

370

# data from the parent → child edge.

371

for parent, parent_rev in ((1, p1), (2, p2)):

371

for parent, parent_rev in ((1, p1), (2, p2)):

372

if parent_rev == nullrev:

372

if parent_rev == nullrev:

373

continue

373

continue

374

remaining_children = children_count.get(parent_rev)

374

remaining_children = children_count.get(parent_rev)

375

if remaining_children is None:

375

if remaining_children is None:

376

continue

376

continue

377

remaining_children -= 1

377

remaining_children -= 1

378

children_count[parent_rev] = remaining_children

378

children_count[parent_rev] = remaining_children

379

if remaining_children:

379

if remaining_children:

380

copies = all_copies.get(parent_rev, None)

380

copies = all_copies.get(parent_rev, None)

381

else:

381

else:

382

copies = all_copies.pop(parent_rev, None)

382

copies = all_copies.pop(parent_rev, None)

383

384

if copies is None:

384

if copies is None:

385

# this is a root

385

# this is a root

386

copies = {}

386

copies = {}

387

388

newcopies = copies

388

newcopies = copies

389

# chain the data in the edge with the existing data

389

# chain the data in the edge with the existing data

390

if changes is not None:

390

if changes is not None:

391

childcopies = {}

391

childcopies = {}

392

if parent == 1:

392

if parent == 1:

393

childcopies = changes.copied_from_p1

393

childcopies = changes.copied_from_p1

394

elif parent == 2:

394

elif parent == 2:

395

childcopies = changes.copied_from_p2

395

childcopies = changes.copied_from_p2

396

397

if childcopies:

397

if childcopies:

398

newcopies = copies.copy()

398

newcopies = copies.copy()

399

for dest, source in pycompat.iteritems(childcopies):

399

for dest, source in pycompat.iteritems(childcopies):

400

prev = copies.get(source)

400

prev = copies.get(source)

401

if prev is not None and prev[1] is not None:

401

if prev is not None and prev[1] is not None:

402

source = prev[1]

402

source = prev[1]

403

newcopies[dest] = (current_rev, source)

403

newcopies[dest] = (current_rev, source)

404

assert newcopies is not copies

404

assert newcopies is not copies

405

if changes.removed:

405

if changes.removed:

406

if newcopies is copies:

406

if newcopies is copies:

407

newcopies = copies.copy()

407

newcopies = copies.copy()

408

for f in changes.removed:

408

for f in changes.removed:

409

if f in newcopies:

409

if f in newcopies:

410

if newcopies is copies:

410

if newcopies is copies:

411

# copy on write to avoid affecting potential other

411

# copy on write to avoid affecting potential other

412

# branches. when there are no other branches, this

412

# branches. when there are no other branches, this

413

# could be avoided.

413

# could be avoided.

414

newcopies = copies.copy()

414

newcopies = copies.copy()

415

newcopies[f] = (current_rev, None)

415

newcopies[f] = (current_rev, None)

416

# check potential need to combine the data from another parent (for

416

# check potential need to combine the data from another parent (for

417

# that child). See comment below for details.

417

# that child). See comment below for details.

418

if current_copies is None:

418

if current_copies is None:

419

current_copies = newcopies

419

current_copies = newcopies

420

elif current_copies is newcopies:

420

elif current_copies is newcopies:

421

# nothing to merge:

421

# nothing to merge:

422

pass

422

pass

423

else:

423

else:

424

# we are the second parent to work on c, we need to merge our

424

# we are the second parent to work on c, we need to merge our

425

# work with the other.

425

# work with the other.

426

#

426

#

427

# In case of conflict, parent 1 take precedence over parent 2.

427

# In case of conflict, parent 1 take precedence over parent 2.

428

# This is an arbitrary choice made anew when implementing

428

# This is an arbitrary choice made anew when implementing

429

# changeset based copies. It was made without regards with

429

# changeset based copies. It was made without regards with

430

# potential filelog related behavior.

430

# potential filelog related behavior.

431

assert parent == 2

431

assert parent == 2

432

current_copies = _merge_copies_dict(

432

current_copies = _merge_copies_dict(

433

newcopies, current_copies, isancestor, changes

433

newcopies, current_copies, isancestor, changes

434

)

434

)

435

all_copies[current_rev] = current_copies

435

all_copies[current_rev] = current_copies

436

437

# filter out internal details and return a {dest: source mapping}

437

# filter out internal details and return a {dest: source mapping}

438

final_copies = {}

438

final_copies = {}

439

for dest, (tt, source) in all_copies[targetrev].items():

439

for dest, (tt, source) in all_copies[targetrev].items():

440

if source is not None:

440

if source is not None:

441

final_copies[dest] = source

441

final_copies[dest] = source

442

if not alwaysmatch:

442

if not alwaysmatch:

443

for filename in list(final_copies.keys()):

443

for filename in list(final_copies.keys()):

444

if not match(filename):

444

if not match(filename):

445

del final_copies[filename]

445

del final_copies[filename]

446

return final_copies

446

return final_copies

447

448

449

# constant to decide which side to pick with _merge_copies_dict

449

# constant to decide which side to pick with _merge_copies_dict

450

PICK_MINOR = 0

450

PICK_MINOR = 0

451

PICK_MAJOR = 1

451

PICK_MAJOR = 1

452

PICK_EITHER = 2

452

PICK_EITHER = 2

453

454

455

def _merge_copies_dict(minor, major, isancestor, changes):

455

def _merge_copies_dict(minor, major, isancestor, changes):

456

"""merge two copies-mapping together, minor and major

456

"""merge two copies-mapping together, minor and major

457

458

In case of conflict, value from "major" will be picked.

458

In case of conflict, value from "major" will be picked.

459

460

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

460

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

461

ancestors of `high_rev`,

461

ancestors of `high_rev`,

462

463

- `ismerged(path)`: callable return True if `path` have been merged in the

463

- `ismerged(path)`: callable return True if `path` have been merged in the

464

current revision,

464

current revision,

465

466

return the resulting dict (in practice, the "minor" object, updated)

466

return the resulting dict (in practice, the "minor" object, updated)

467

"""

467

"""

468

for dest, value in major.items():

468

for dest, value in major.items():

469

other = minor.get(dest)

469

other = minor.get(dest)

470

if other is None:

470

if other is None:

471

minor[dest] = value

471

minor[dest] = value

472

else:

472

else:

473

pick = _compare_values(changes, isancestor, dest, other, value)

473

pick = _compare_values(changes, isancestor, dest, other, value)

474

if pick == PICK_MAJOR:

474

if pick == PICK_MAJOR:

475

minor[dest] = value

475

minor[dest] = value

476

return minor

476

return minor

477

478

479

def _compare_values(changes, isancestor, dest, o~~the~~r, ~~value~~):

479

def _compare_values(changes, isancestor, dest, minor, major):

480

"""compare two value within a _merge_copies_dict loop iteration"""

480

"""compare two value within a _merge_copies_dict loop iteration"""

481

~~new~~_tt = ~~value~~[0]

481

major_tt = major[0]

482

o~~the~~r_tt = o~~the~~r[0]

482

minor_tt = minor[0]

483

484

if ~~value~~[1] == o~~the~~r[1]:

484

if major[1] == minor[1]:

485

return PICK_EITHER

485

return PICK_EITHER

486

# content from "major" wins, unless it is older

486

# content from "major" wins, unless it is older

487

# than the branch point or there is a merge

487

# than the branch point or there is a merge

488

if ~~new~~_tt == o~~the~~r_tt:

488

if major_tt == minor_tt:

489

return PICK_MAJOR

489

return PICK_MAJOR

490

elif changes is not None and ~~value~~[1] is None and dest in changes.salvaged:

490

elif changes is not None and major[1] is None and dest in changes.salvaged:

491

return PICK_MINOR

491

return PICK_MINOR

492

elif changes is not None and o~~the~~r[1] is None and dest in changes.salvaged:

492

elif changes is not None and minor[1] is None and dest in changes.salvaged:

493

return PICK_MAJOR

493

return PICK_MAJOR

494

elif changes is not None and dest in changes.merged:

494

elif changes is not None and dest in changes.merged:

495

return PICK_MAJOR

495

return PICK_MAJOR

496

elif not isancestor(~~new~~_tt, o~~the~~r_tt):

496

elif not isancestor(major_tt, minor_tt):

497

if ~~value~~[1] is not None:

497

if major[1] is not None:

498

return PICK_MAJOR

498

return PICK_MAJOR

499

elif isancestor(o~~the~~r_tt, ~~new~~_tt):

499

elif isancestor(minor_tt, major_tt):

500

return PICK_MAJOR

500

return PICK_MAJOR

501

return PICK_MINOR

501

return PICK_MINOR

502

503

504

def _revinfo_getter_extra(repo):

504

def _revinfo_getter_extra(repo):

505

"""return a function that return multiple data given a <rev>"i

505

"""return a function that return multiple data given a <rev>"i

506

507

* p1: revision number of first parent

507

* p1: revision number of first parent

508

* p2: revision number of first parent

508

* p2: revision number of first parent

509

* p1copies: mapping of copies from p1

509

* p1copies: mapping of copies from p1

510

* p2copies: mapping of copies from p2

510

* p2copies: mapping of copies from p2

511

* removed: a list of removed files

511

* removed: a list of removed files

512

* ismerged: a callback to know if file was merged in that revision

512

* ismerged: a callback to know if file was merged in that revision

513

"""

513

"""

514

cl = repo.changelog

514

cl = repo.changelog

515

parents = cl.parentrevs

515

parents = cl.parentrevs

516

517

def get_ismerged(rev):

517

def get_ismerged(rev):

518

ctx = repo[rev]

518

ctx = repo[rev]

519

520

def ismerged(path):

520

def ismerged(path):

521

if path not in ctx.files():

521

if path not in ctx.files():

522

return False

522

return False

523

fctx = ctx[path]

523

fctx = ctx[path]

524

parents = fctx._filelog.parents(fctx._filenode)

524

parents = fctx._filelog.parents(fctx._filenode)

525

nb_parents = 0

525

nb_parents = 0

526

for n in parents:

526

for n in parents:

527

if n != nullid:

527

if n != nullid:

528

nb_parents += 1

528

nb_parents += 1

529

return nb_parents >= 2

529

return nb_parents >= 2

530

531

return ismerged

531

return ismerged

532

533

def revinfo(rev):

533

def revinfo(rev):

534

p1, p2 = parents(rev)

534

p1, p2 = parents(rev)

535

ctx = repo[rev]

535

ctx = repo[rev]

536

p1copies, p2copies = ctx._copies

536

p1copies, p2copies = ctx._copies

537

removed = ctx.filesremoved()

537

removed = ctx.filesremoved()

538

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

538

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

539

540

return revinfo

540

return revinfo

541

542

543

def _combine_changeset_copies_extra(

543

def _combine_changeset_copies_extra(

544

revs, children, targetrev, revinfo, match, isancestor

544

revs, children, targetrev, revinfo, match, isancestor

545

):

545

):

546

"""version of `_combine_changeset_copies` that works with the Google

546

"""version of `_combine_changeset_copies` that works with the Google

547

specific "extra" based storage for copy information"""

547

specific "extra" based storage for copy information"""

548

all_copies = {}

548

all_copies = {}

549

alwaysmatch = match.always()

549

alwaysmatch = match.always()

550

for r in revs:

550

for r in revs:

551

copies = all_copies.pop(r, None)

551

copies = all_copies.pop(r, None)

552

if copies is None:

552

if copies is None:

553

# this is a root

553

# this is a root

554

copies = {}

554

copies = {}

555

for i, c in enumerate(children[r]):

555

for i, c in enumerate(children[r]):

556

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

556

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

557

if r == p1:

557

if r == p1:

558

parent = 1

558

parent = 1

559

childcopies = p1copies

559

childcopies = p1copies

560

else:

560

else:

561

assert r == p2

561

assert r == p2

562

parent = 2

562

parent = 2

563

childcopies = p2copies

563

childcopies = p2copies

564

if not alwaysmatch:

564

if not alwaysmatch:

565

childcopies = {

565

childcopies = {

566

dst: src for dst, src in childcopies.items() if match(dst)

566

dst: src for dst, src in childcopies.items() if match(dst)

567

}

567

}

568

newcopies = copies

568

newcopies = copies

569

if childcopies:

569

if childcopies:

570

newcopies = copies.copy()

570

newcopies = copies.copy()

571

for dest, source in pycompat.iteritems(childcopies):

571

for dest, source in pycompat.iteritems(childcopies):

572

prev = copies.get(source)

572

prev = copies.get(source)

573

if prev is not None and prev[1] is not None:

573

if prev is not None and prev[1] is not None:

574

source = prev[1]

574

source = prev[1]

575

newcopies[dest] = (c, source)

575

newcopies[dest] = (c, source)

576

assert newcopies is not copies

576

assert newcopies is not copies

577

for f in removed:

577

for f in removed:

578

if f in newcopies:

578

if f in newcopies:

579

if newcopies is copies:

579

if newcopies is copies:

580

# copy on write to avoid affecting potential other

580

# copy on write to avoid affecting potential other

581

# branches. when there are no other branches, this

581

# branches. when there are no other branches, this

582

# could be avoided.

582

# could be avoided.

583

newcopies = copies.copy()

583

newcopies = copies.copy()

584

newcopies[f] = (c, None)

584

newcopies[f] = (c, None)

585

othercopies = all_copies.get(c)

585

othercopies = all_copies.get(c)

586

if othercopies is None:

586

if othercopies is None:

587

all_copies[c] = newcopies

587

all_copies[c] = newcopies

588

else:

588

else:

589

# we are the second parent to work on c, we need to merge our

589

# we are the second parent to work on c, we need to merge our

590

# work with the other.

590

# work with the other.

591

#

591

#

592

# In case of conflict, parent 1 take precedence over parent 2.

592

# In case of conflict, parent 1 take precedence over parent 2.

593

# This is an arbitrary choice made anew when implementing

593

# This is an arbitrary choice made anew when implementing

594

# changeset based copies. It was made without regards with

594

# changeset based copies. It was made without regards with

595

# potential filelog related behavior.

595

# potential filelog related behavior.

596

if parent == 1:

596

if parent == 1:

597

_merge_copies_dict_extra(

597

_merge_copies_dict_extra(

598

othercopies, newcopies, isancestor, ismerged

598

othercopies, newcopies, isancestor, ismerged

599

)

599

)

600

else:

600

else:

601

_merge_copies_dict_extra(

601

_merge_copies_dict_extra(

602

newcopies, othercopies, isancestor, ismerged

602

newcopies, othercopies, isancestor, ismerged

603

)

603

)

604

all_copies[c] = newcopies

604

all_copies[c] = newcopies

605

606

final_copies = {}

606

final_copies = {}

607

for dest, (tt, source) in all_copies[targetrev].items():

607

for dest, (tt, source) in all_copies[targetrev].items():

608

if source is not None:

608

if source is not None:

609

final_copies[dest] = source

609

final_copies[dest] = source

610

return final_copies

610

return final_copies

611

612

613

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

613

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

614

"""version of `_merge_copies_dict` that works with the Google

614

"""version of `_merge_copies_dict` that works with the Google

615

specific "extra" based storage for copy information"""

615

specific "extra" based storage for copy information"""

616

for dest, value in major.items():

616

for dest, value in major.items():

617

other = minor.get(dest)

617

other = minor.get(dest)

618

if other is None:

618

if other is None:

619

minor[dest] = value

619

minor[dest] = value

620

else:

620

else:

621

new_tt = value[0]

621

new_tt = value[0]

622

other_tt = other[0]

622

other_tt = other[0]

623

if value[1] == other[1]:

623

if value[1] == other[1]:

624

continue

624

continue

625

# content from "major" wins, unless it is older

625

# content from "major" wins, unless it is older

626

# than the branch point or there is a merge

626

# than the branch point or there is a merge

627

if (

627

if (

628

new_tt == other_tt

628

new_tt == other_tt

629

or not isancestor(new_tt, other_tt)

629

or not isancestor(new_tt, other_tt)

630

or ismerged(dest)

630

or ismerged(dest)

631

):

631

):

632

minor[dest] = value

632

minor[dest] = value

633

634

635

def _forwardcopies(a, b, base=None, match=None):

635

def _forwardcopies(a, b, base=None, match=None):

636

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

636

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

637

638

if base is None:

638

if base is None:

639

base = a

639

base = a

640

match = a.repo().narrowmatch(match)

640

match = a.repo().narrowmatch(match)

641

# check for working copy

641

# check for working copy

642

if b.rev() is None:

642

if b.rev() is None:

643

cm = _committedforwardcopies(a, b.p1(), base, match)

643

cm = _committedforwardcopies(a, b.p1(), base, match)

644

# combine copies from dirstate if necessary

644

# combine copies from dirstate if necessary

645

copies = _chain(cm, _dirstatecopies(b._repo, match))

645

copies = _chain(cm, _dirstatecopies(b._repo, match))

646

else:

646

else:

647

copies = _committedforwardcopies(a, b, base, match)

647

copies = _committedforwardcopies(a, b, base, match)

648

return copies

648

return copies

649

650

651

def _backwardrenames(a, b, match):

651

def _backwardrenames(a, b, match):

652

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

652

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

653

return {}

653

return {}

654

655

# Even though we're not taking copies into account, 1:n rename situations

655

# Even though we're not taking copies into account, 1:n rename situations

656

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

656

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

657

# arbitrarily pick one of the renames.

657

# arbitrarily pick one of the renames.

658

# We don't want to pass in "match" here, since that would filter

658

# We don't want to pass in "match" here, since that would filter

659

# the destination by it. Since we're reversing the copies, we want

659

# the destination by it. Since we're reversing the copies, we want

660

# to filter the source instead.

660

# to filter the source instead.

661

f = _forwardcopies(b, a)

661

f = _forwardcopies(b, a)

662

r = {}

662

r = {}

663

for k, v in sorted(pycompat.iteritems(f)):

663

for k, v in sorted(pycompat.iteritems(f)):

664

if match and not match(v):

664

if match and not match(v):

665

continue

665

continue

666

# remove copies

666

# remove copies

667

if v in a:

667

if v in a:

668

continue

668

continue

669

r[v] = k

669

r[v] = k

670

return r

670

return r

671

672

673

def pathcopies(x, y, match=None):

673

def pathcopies(x, y, match=None):

674

"""find {dst@y: src@x} copy mapping for directed compare"""

674

"""find {dst@y: src@x} copy mapping for directed compare"""

675

repo = x._repo

675

repo = x._repo

676

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

676

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

677

if debug:

677

if debug:

678

repo.ui.debug(

678

repo.ui.debug(

679

b'debug.copies: searching copies from %s to %s\n' % (x, y)

679

b'debug.copies: searching copies from %s to %s\n' % (x, y)

680

)

680

)

681

if x == y or not x or not y:

681

if x == y or not x or not y:

682

return {}

682

return {}

683

if y.rev() is None and x == y.p1():

683

if y.rev() is None and x == y.p1():

684

if debug:

684

if debug:

685

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

685

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

686

# short-circuit to avoid issues with merge states

686

# short-circuit to avoid issues with merge states

687

return _dirstatecopies(repo, match)

687

return _dirstatecopies(repo, match)

688

a = y.ancestor(x)

688

a = y.ancestor(x)

689

if a == x:

689

if a == x:

690

if debug:

690

if debug:

691

repo.ui.debug(b'debug.copies: search mode: forward\n')

691

repo.ui.debug(b'debug.copies: search mode: forward\n')

692

copies = _forwardcopies(x, y, match=match)

692

copies = _forwardcopies(x, y, match=match)

693

elif a == y:

693

elif a == y:

694

if debug:

694

if debug:

695

repo.ui.debug(b'debug.copies: search mode: backward\n')

695

repo.ui.debug(b'debug.copies: search mode: backward\n')

696

copies = _backwardrenames(x, y, match=match)

696

copies = _backwardrenames(x, y, match=match)

697

else:

697

else:

698

if debug:

698

if debug:

699

repo.ui.debug(b'debug.copies: search mode: combined\n')

699

repo.ui.debug(b'debug.copies: search mode: combined\n')

700

base = None

700

base = None

701

if a.rev() != nullrev:

701

if a.rev() != nullrev:

702

base = x

702

base = x

703

copies = _chain(

703

copies = _chain(

704

_backwardrenames(x, a, match=match),

704

_backwardrenames(x, a, match=match),

705

_forwardcopies(a, y, base, match=match),

705

_forwardcopies(a, y, base, match=match),

706

)

706

)

707

_filter(x, y, copies)

707

_filter(x, y, copies)

708

return copies

708

return copies

709

710

711

def mergecopies(repo, c1, c2, base):

711

def mergecopies(repo, c1, c2, base):

712

"""

712

"""

713

Finds moves and copies between context c1 and c2 that are relevant for

713

Finds moves and copies between context c1 and c2 that are relevant for

714

merging. 'base' will be used as the merge base.

714

merging. 'base' will be used as the merge base.

715

716

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

716

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

717

files that were moved/ copied in one merge parent and modified in another.

717

files that were moved/ copied in one merge parent and modified in another.

718

For example:

718

For example:

719

720

o ---> 4 another commit

720

o ---> 4 another commit

721

|

721

|

722

| o ---> 3 commit that modifies a.txt

722

| o ---> 3 commit that modifies a.txt

723

| /

723

| /

724

o / ---> 2 commit that moves a.txt to b.txt

724

o / ---> 2 commit that moves a.txt to b.txt

725

|/

725

|/

726

o ---> 1 merge base

726

o ---> 1 merge base

727

728

If we try to rebase revision 3 on revision 4, since there is no a.txt in

728

If we try to rebase revision 3 on revision 4, since there is no a.txt in

729

revision 4, and if user have copytrace disabled, we prints the following

729

revision 4, and if user have copytrace disabled, we prints the following

730

message:

730

message:

731

732

```other changed <file> which local deleted```

732

```other changed <file> which local deleted```

733

734

Returns a tuple where:

734

Returns a tuple where:

735

736

"branch_copies" an instance of branch_copies.

736

"branch_copies" an instance of branch_copies.

737

738

"diverge" is a mapping of source name -> list of destination names

738

"diverge" is a mapping of source name -> list of destination names

739

for divergent renames.

739

for divergent renames.

740

741

This function calls different copytracing algorithms based on config.

741

This function calls different copytracing algorithms based on config.

742

"""

742

"""

743

# avoid silly behavior for update from empty dir

743

# avoid silly behavior for update from empty dir

744

if not c1 or not c2 or c1 == c2:

744

if not c1 or not c2 or c1 == c2:

745

return branch_copies(), branch_copies(), {}

745

return branch_copies(), branch_copies(), {}

746

747

narrowmatch = c1.repo().narrowmatch()

747

narrowmatch = c1.repo().narrowmatch()

748

749

# avoid silly behavior for parent -> working dir

749

# avoid silly behavior for parent -> working dir

750

if c2.node() is None and c1.node() == repo.dirstate.p1():

750

if c2.node() is None and c1.node() == repo.dirstate.p1():

751

return (

751

return (

752

branch_copies(_dirstatecopies(repo, narrowmatch)),

752

branch_copies(_dirstatecopies(repo, narrowmatch)),

753

branch_copies(),

753

branch_copies(),

754

{},

754

{},

755

)

755

)

756

757

copytracing = repo.ui.config(b'experimental', b'copytrace')

757

copytracing = repo.ui.config(b'experimental', b'copytrace')

758

if stringutil.parsebool(copytracing) is False:

758

if stringutil.parsebool(copytracing) is False:

759

# stringutil.parsebool() returns None when it is unable to parse the

759

# stringutil.parsebool() returns None when it is unable to parse the

760

# value, so we should rely on making sure copytracing is on such cases

760

# value, so we should rely on making sure copytracing is on such cases

761

return branch_copies(), branch_copies(), {}

761

return branch_copies(), branch_copies(), {}

762

763

if usechangesetcentricalgo(repo):

763

if usechangesetcentricalgo(repo):

764

# The heuristics don't make sense when we need changeset-centric algos

764

# The heuristics don't make sense when we need changeset-centric algos

765

return _fullcopytracing(repo, c1, c2, base)

765

return _fullcopytracing(repo, c1, c2, base)

766

767

# Copy trace disabling is explicitly below the node == p1 logic above

767

# Copy trace disabling is explicitly below the node == p1 logic above

768

# because the logic above is required for a simple copy to be kept across a

768

# because the logic above is required for a simple copy to be kept across a

769

# rebase.

769

# rebase.

770

if copytracing == b'heuristics':

770

if copytracing == b'heuristics':

771

# Do full copytracing if only non-public revisions are involved as

771

# Do full copytracing if only non-public revisions are involved as

772

# that will be fast enough and will also cover the copies which could

772

# that will be fast enough and will also cover the copies which could

773

# be missed by heuristics

773

# be missed by heuristics

774

if _isfullcopytraceable(repo, c1, base):

774

if _isfullcopytraceable(repo, c1, base):

775

return _fullcopytracing(repo, c1, c2, base)

775

return _fullcopytracing(repo, c1, c2, base)

776

return _heuristicscopytracing(repo, c1, c2, base)

776

return _heuristicscopytracing(repo, c1, c2, base)

777

else:

777

else:

778

return _fullcopytracing(repo, c1, c2, base)

778

return _fullcopytracing(repo, c1, c2, base)

779

780

781

def _isfullcopytraceable(repo, c1, base):

781

def _isfullcopytraceable(repo, c1, base):

782

"""Checks that if base, source and destination are all no-public branches,

782

"""Checks that if base, source and destination are all no-public branches,

783

if yes let's use the full copytrace algorithm for increased capabilities

783

if yes let's use the full copytrace algorithm for increased capabilities

784

since it will be fast enough.

784

since it will be fast enough.

785

786

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

786

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

787

number of changesets from c1 to base such that if number of changesets are

787

number of changesets from c1 to base such that if number of changesets are

788

more than the limit, full copytracing algorithm won't be used.

788

more than the limit, full copytracing algorithm won't be used.

789

"""

789

"""

790

if c1.rev() is None:

790

if c1.rev() is None:

791

c1 = c1.p1()

791

c1 = c1.p1()

792

if c1.mutable() and base.mutable():

792

if c1.mutable() and base.mutable():

793

sourcecommitlimit = repo.ui.configint(

793

sourcecommitlimit = repo.ui.configint(

794

b'experimental', b'copytrace.sourcecommitlimit'

794

b'experimental', b'copytrace.sourcecommitlimit'

795

)

795

)

796

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

796

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

797

return commits < sourcecommitlimit

797

return commits < sourcecommitlimit

798

return False

798

return False

799

800

801

def _checksinglesidecopies(

801

def _checksinglesidecopies(

802

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

802

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

803

):

803

):

804

if src not in m2:

804

if src not in m2:

805

# deleted on side 2

805

# deleted on side 2

806

if src not in m1:

806

if src not in m1:

807

# renamed on side 1, deleted on side 2

807

# renamed on side 1, deleted on side 2

808

renamedelete[src] = dsts1

808

renamedelete[src] = dsts1

809

elif src not in mb:

809

elif src not in mb:

810

# Work around the "short-circuit to avoid issues with merge states"

810

# Work around the "short-circuit to avoid issues with merge states"

811

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

811

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

812

# destination doesn't exist in y.

812

# destination doesn't exist in y.

813

pass

813

pass

814

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

814

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

815

return

815

return

816

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

816

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

817

# modified on side 2

817

# modified on side 2

818

for dst in dsts1:

818

for dst in dsts1:

819

copy[dst] = src

819

copy[dst] = src

820

821

822

class branch_copies(object):

822

class branch_copies(object):

823

"""Information about copies made on one side of a merge/graft.

823

"""Information about copies made on one side of a merge/graft.

824

825

"copy" is a mapping from destination name -> source name,

825

"copy" is a mapping from destination name -> source name,

826

where source is in c1 and destination is in c2 or vice-versa.

826

where source is in c1 and destination is in c2 or vice-versa.

827

828

"movewithdir" is a mapping from source name -> destination name,

828

"movewithdir" is a mapping from source name -> destination name,

829

where the file at source present in one context but not the other

829

where the file at source present in one context but not the other

830

needs to be moved to destination by the merge process, because the

830

needs to be moved to destination by the merge process, because the

831

other context moved the directory it is in.

831

other context moved the directory it is in.

832

833

"renamedelete" is a mapping of source name -> list of destination

833

"renamedelete" is a mapping of source name -> list of destination

834

names for files deleted in c1 that were renamed in c2 or vice-versa.

834

names for files deleted in c1 that were renamed in c2 or vice-versa.

835

836

"dirmove" is a mapping of detected source dir -> destination dir renames.

836

"dirmove" is a mapping of detected source dir -> destination dir renames.

837

This is needed for handling changes to new files previously grafted into

837

This is needed for handling changes to new files previously grafted into

838

renamed directories.

838

renamed directories.

839

"""

839

"""

840

841

def __init__(

841

def __init__(

842

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

842

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

843

):

843

):

844

self.copy = {} if copy is None else copy

844

self.copy = {} if copy is None else copy

845

self.renamedelete = {} if renamedelete is None else renamedelete

845

self.renamedelete = {} if renamedelete is None else renamedelete

846

self.dirmove = {} if dirmove is None else dirmove

846

self.dirmove = {} if dirmove is None else dirmove

847

self.movewithdir = {} if movewithdir is None else movewithdir

847

self.movewithdir = {} if movewithdir is None else movewithdir

848

849

def __repr__(self):

849

def __repr__(self):

850

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

850

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

851

self.copy,

851

self.copy,

852

self.renamedelete,

852

self.renamedelete,

853

self.dirmove,

853

self.dirmove,

854

self.movewithdir,

854

self.movewithdir,

855

)

855

)

856

857

858

def _fullcopytracing(repo, c1, c2, base):

858

def _fullcopytracing(repo, c1, c2, base):

859

"""The full copytracing algorithm which finds all the new files that were

859

"""The full copytracing algorithm which finds all the new files that were

860

added from merge base up to the top commit and for each file it checks if

860

added from merge base up to the top commit and for each file it checks if

861

this file was copied from another file.

861

this file was copied from another file.

862

863

This is pretty slow when a lot of changesets are involved but will track all

863

This is pretty slow when a lot of changesets are involved but will track all

864

the copies.

864

the copies.

865

"""

865

"""

866

m1 = c1.manifest()

866

m1 = c1.manifest()

867

m2 = c2.manifest()

867

m2 = c2.manifest()

868

mb = base.manifest()

868

mb = base.manifest()

869

870

copies1 = pathcopies(base, c1)

870

copies1 = pathcopies(base, c1)

871

copies2 = pathcopies(base, c2)

871

copies2 = pathcopies(base, c2)

872

873

if not (copies1 or copies2):

873

if not (copies1 or copies2):

874

return branch_copies(), branch_copies(), {}

874

return branch_copies(), branch_copies(), {}

875

876

inversecopies1 = {}

876

inversecopies1 = {}

877

inversecopies2 = {}

877

inversecopies2 = {}

878

for dst, src in copies1.items():

878

for dst, src in copies1.items():

879

inversecopies1.setdefault(src, []).append(dst)

879

inversecopies1.setdefault(src, []).append(dst)

880

for dst, src in copies2.items():

880

for dst, src in copies2.items():

881

inversecopies2.setdefault(src, []).append(dst)

881

inversecopies2.setdefault(src, []).append(dst)

882

883

copy1 = {}

883

copy1 = {}

884

copy2 = {}

884

copy2 = {}

885

diverge = {}

885

diverge = {}

886

renamedelete1 = {}

886

renamedelete1 = {}

887

renamedelete2 = {}

887

renamedelete2 = {}

888

allsources = set(inversecopies1) | set(inversecopies2)

888

allsources = set(inversecopies1) | set(inversecopies2)

889

for src in allsources:

889

for src in allsources:

890

dsts1 = inversecopies1.get(src)

890

dsts1 = inversecopies1.get(src)

891

dsts2 = inversecopies2.get(src)

891

dsts2 = inversecopies2.get(src)

892

if dsts1 and dsts2:

892

if dsts1 and dsts2:

893

# copied/renamed on both sides

893

# copied/renamed on both sides

894

if src not in m1 and src not in m2:

894

if src not in m1 and src not in m2:

895

# renamed on both sides

895

# renamed on both sides

896

dsts1 = set(dsts1)

896

dsts1 = set(dsts1)

897

dsts2 = set(dsts2)

897

dsts2 = set(dsts2)

898

# If there's some overlap in the rename destinations, we

898

# If there's some overlap in the rename destinations, we

899

# consider it not divergent. For example, if side 1 copies 'a'

899

# consider it not divergent. For example, if side 1 copies 'a'

900

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

900

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

901

# and 'd' and deletes 'a'.

901

# and 'd' and deletes 'a'.

902

if dsts1 & dsts2:

902

if dsts1 & dsts2:

903

for dst in dsts1 & dsts2:

903

for dst in dsts1 & dsts2:

904

copy1[dst] = src

904

copy1[dst] = src

905

copy2[dst] = src

905

copy2[dst] = src

906

else:

906

else:

907

diverge[src] = sorted(dsts1 | dsts2)

907

diverge[src] = sorted(dsts1 | dsts2)

908

elif src in m1 and src in m2:

908

elif src in m1 and src in m2:

909

# copied on both sides

909

# copied on both sides

910

dsts1 = set(dsts1)

910

dsts1 = set(dsts1)

911

dsts2 = set(dsts2)

911

dsts2 = set(dsts2)

912

for dst in dsts1 & dsts2:

912

for dst in dsts1 & dsts2:

913

copy1[dst] = src

913

copy1[dst] = src

914

copy2[dst] = src

914

copy2[dst] = src

915

# TODO: Handle cases where it was renamed on one side and copied

915

# TODO: Handle cases where it was renamed on one side and copied

916

# on the other side

916

# on the other side

917

elif dsts1:

917

elif dsts1:

918

# copied/renamed only on side 1

918

# copied/renamed only on side 1

919

_checksinglesidecopies(

919

_checksinglesidecopies(

920

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

920

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

921

)

921

)

922

elif dsts2:

922

elif dsts2:

923

# copied/renamed only on side 2

923

# copied/renamed only on side 2

924

_checksinglesidecopies(

924

_checksinglesidecopies(

925

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

925

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

926

)

926

)

927

928

# find interesting file sets from manifests

928

# find interesting file sets from manifests

929

cache = []

929

cache = []

930

931

def _get_addedfiles(idx):

931

def _get_addedfiles(idx):

932

if not cache:

932

if not cache:

933

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

933

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

934

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

934

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

935

u1 = sorted(addedinm1 - addedinm2)

935

u1 = sorted(addedinm1 - addedinm2)

936

u2 = sorted(addedinm2 - addedinm1)

936

u2 = sorted(addedinm2 - addedinm1)

937

cache.extend((u1, u2))

937

cache.extend((u1, u2))

938

return cache[idx]

938

return cache[idx]

939

940

u1fn = lambda: _get_addedfiles(0)

940

u1fn = lambda: _get_addedfiles(0)

941

u2fn = lambda: _get_addedfiles(1)

941

u2fn = lambda: _get_addedfiles(1)

942

if repo.ui.debugflag:

942

if repo.ui.debugflag:

943

u1 = u1fn()

943

u1 = u1fn()

944

u2 = u2fn()

944

u2 = u2fn()

945

946

header = b" unmatched files in %s"

946

header = b" unmatched files in %s"

947

if u1:

947

if u1:

948

repo.ui.debug(

948

repo.ui.debug(

949

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

949

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

950

)

950

)

951

if u2:

951

if u2:

952

repo.ui.debug(

952

repo.ui.debug(

953

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

953

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

954

)

954

)

955

956

renamedeleteset = set()

956

renamedeleteset = set()

957

divergeset = set()

957

divergeset = set()

958

for dsts in diverge.values():

958

for dsts in diverge.values():

959

divergeset.update(dsts)

959

divergeset.update(dsts)

960

for dsts in renamedelete1.values():

960

for dsts in renamedelete1.values():

961

renamedeleteset.update(dsts)

961

renamedeleteset.update(dsts)

962

for dsts in renamedelete2.values():

962

for dsts in renamedelete2.values():

963

renamedeleteset.update(dsts)

963

renamedeleteset.update(dsts)

964

965

repo.ui.debug(

965

repo.ui.debug(

966

b" all copies found (* = to merge, ! = divergent, "

966

b" all copies found (* = to merge, ! = divergent, "

967

b"% = renamed and deleted):\n"

967

b"% = renamed and deleted):\n"

968

)

968

)

969

for side, copies in ((b"local", copies1), (b"remote", copies2)):

969

for side, copies in ((b"local", copies1), (b"remote", copies2)):

970

if not copies:

970

if not copies:

971

continue

971

continue

972

repo.ui.debug(b" on %s side:\n" % side)

972

repo.ui.debug(b" on %s side:\n" % side)

973

for f in sorted(copies):

973

for f in sorted(copies):

974

note = b""

974

note = b""

975

if f in copy1 or f in copy2:

975

if f in copy1 or f in copy2:

976

note += b"*"

976

note += b"*"

977

if f in divergeset:

977

if f in divergeset:

978

note += b"!"

978

note += b"!"

979

if f in renamedeleteset:

979

if f in renamedeleteset:

980

note += b"%"

980

note += b"%"

981

repo.ui.debug(

981

repo.ui.debug(

982

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

982

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

983

)

983

)

984

del renamedeleteset

984

del renamedeleteset

985

del divergeset

985

del divergeset

986

987

repo.ui.debug(b" checking for directory renames\n")

987

repo.ui.debug(b" checking for directory renames\n")

988

989

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

989

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

990

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

990

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

991

992

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

992

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

993

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

993

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

994

995

return branch_copies1, branch_copies2, diverge

995

return branch_copies1, branch_copies2, diverge

996

997

998

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

998

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

999

"""Finds moved directories and files that should move with them.

999

"""Finds moved directories and files that should move with them.

1000

1001

ctx: the context for one of the sides

1001

ctx: the context for one of the sides

1002

copy: files copied on the same side (as ctx)

1002

copy: files copied on the same side (as ctx)

1003

fullcopy: files copied on the same side (as ctx), including those that

1003

fullcopy: files copied on the same side (as ctx), including those that

1004

merge.manifestmerge() won't care about

1004

merge.manifestmerge() won't care about

1005

addedfilesfn: function returning added files on the other side (compared to

1005

addedfilesfn: function returning added files on the other side (compared to

1006

ctx)

1006

ctx)

1007

"""

1007

"""

1008

# generate a directory move map

1008

# generate a directory move map

1009

invalid = set()

1009

invalid = set()

1010

dirmove = {}

1010

dirmove = {}

1011

1012

# examine each file copy for a potential directory move, which is

1012

# examine each file copy for a potential directory move, which is

1013

# when all the files in a directory are moved to a new directory

1013

# when all the files in a directory are moved to a new directory

1014

for dst, src in pycompat.iteritems(fullcopy):

1014

for dst, src in pycompat.iteritems(fullcopy):

1015

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1015

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1016

if dsrc in invalid:

1016

if dsrc in invalid:

1017

# already seen to be uninteresting

1017

# already seen to be uninteresting

1018

continue

1018

continue

1019

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1019

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1020

# directory wasn't entirely moved locally

1020

# directory wasn't entirely moved locally

1021

invalid.add(dsrc)

1021

invalid.add(dsrc)

1022

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1022

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1023

# files from the same directory moved to two different places

1023

# files from the same directory moved to two different places

1024

invalid.add(dsrc)

1024

invalid.add(dsrc)

1025

else:

1025

else:

1026

# looks good so far

1026

# looks good so far

1027

dirmove[dsrc] = ddst

1027

dirmove[dsrc] = ddst

1028

1029

for i in invalid:

1029

for i in invalid:

1030

if i in dirmove:

1030

if i in dirmove:

1031

del dirmove[i]

1031

del dirmove[i]

1032

del invalid

1032

del invalid

1033

1034

if not dirmove:

1034

if not dirmove:

1035

return {}, {}

1035

return {}, {}

1036

1037

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1037

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1038

1039

for d in dirmove:

1039

for d in dirmove:

1040

repo.ui.debug(

1040

repo.ui.debug(

1041

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1041

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1042

)

1042

)

1043

1044

movewithdir = {}

1044

movewithdir = {}

1045

# check unaccounted nonoverlapping files against directory moves

1045

# check unaccounted nonoverlapping files against directory moves

1046

for f in addedfilesfn():

1046

for f in addedfilesfn():

1047

if f not in fullcopy:

1047

if f not in fullcopy:

1048

for d in dirmove:

1048

for d in dirmove:

1049

if f.startswith(d):

1049

if f.startswith(d):

1050

# new file added in a directory that was moved, move it

1050

# new file added in a directory that was moved, move it

1051

df = dirmove[d] + f[len(d) :]

1051

df = dirmove[d] + f[len(d) :]

1052

if df not in copy:

1052

if df not in copy:

1053

movewithdir[f] = df

1053

movewithdir[f] = df

1054

repo.ui.debug(

1054

repo.ui.debug(

1055

b" pending file src: '%s' -> dst: '%s'\n"

1055

b" pending file src: '%s' -> dst: '%s'\n"

1056

% (f, df)

1056

% (f, df)

1057

)

1057

)

1058

break

1058

break

1059

1060

return dirmove, movewithdir

1060

return dirmove, movewithdir

1061

1062

1063

def _heuristicscopytracing(repo, c1, c2, base):

1063

def _heuristicscopytracing(repo, c1, c2, base):

1064

"""Fast copytracing using filename heuristics

1064

"""Fast copytracing using filename heuristics

1065

1066

Assumes that moves or renames are of following two types:

1066

Assumes that moves or renames are of following two types:

1067

1068

1) Inside a directory only (same directory name but different filenames)

1068

1) Inside a directory only (same directory name but different filenames)

1069

2) Move from one directory to another

1069

2) Move from one directory to another

1070

(same filenames but different directory names)

1070

(same filenames but different directory names)

1071

1072

Works only when there are no merge commits in the "source branch".

1072

Works only when there are no merge commits in the "source branch".

1073

Source branch is commits from base up to c2 not including base.

1073

Source branch is commits from base up to c2 not including base.

1074

1075

If merge is involved it fallbacks to _fullcopytracing().

1075

If merge is involved it fallbacks to _fullcopytracing().

1076

1077

Can be used by setting the following config:

1077

Can be used by setting the following config:

1078

1079

[experimental]

1079

[experimental]

1080

copytrace = heuristics

1080

copytrace = heuristics

1081

1082

In some cases the copy/move candidates found by heuristics can be very large

1082

In some cases the copy/move candidates found by heuristics can be very large

1083

in number and that will make the algorithm slow. The number of possible

1083

in number and that will make the algorithm slow. The number of possible

1084

candidates to check can be limited by using the config

1084

candidates to check can be limited by using the config

1085

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1085

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1086

"""

1086

"""

1087

1088

if c1.rev() is None:

1088

if c1.rev() is None:

1089

c1 = c1.p1()

1089

c1 = c1.p1()

1090

if c2.rev() is None:

1090

if c2.rev() is None:

1091

c2 = c2.p1()

1091

c2 = c2.p1()

1092

1093

changedfiles = set()

1093

changedfiles = set()

1094

m1 = c1.manifest()

1094

m1 = c1.manifest()

1095

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1095

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1096

# If base is not in c2 branch, we switch to fullcopytracing

1096

# If base is not in c2 branch, we switch to fullcopytracing

1097

repo.ui.debug(

1097

repo.ui.debug(

1098

b"switching to full copytracing as base is not "

1098

b"switching to full copytracing as base is not "

1099

b"an ancestor of c2\n"

1099

b"an ancestor of c2\n"

1100

)

1100

)

1101

return _fullcopytracing(repo, c1, c2, base)

1101

return _fullcopytracing(repo, c1, c2, base)

1102

1103

ctx = c2

1103

ctx = c2

1104

while ctx != base:

1104

while ctx != base:

1105

if len(ctx.parents()) == 2:

1105

if len(ctx.parents()) == 2:

1106

# To keep things simple let's not handle merges

1106

# To keep things simple let's not handle merges

1107

repo.ui.debug(b"switching to full copytracing because of merges\n")

1107

repo.ui.debug(b"switching to full copytracing because of merges\n")

1108

return _fullcopytracing(repo, c1, c2, base)

1108

return _fullcopytracing(repo, c1, c2, base)

1109

changedfiles.update(ctx.files())

1109

changedfiles.update(ctx.files())

1110

ctx = ctx.p1()

1110

ctx = ctx.p1()

1111

1112

copies2 = {}

1112

copies2 = {}

1113

cp = _forwardcopies(base, c2)

1113

cp = _forwardcopies(base, c2)

1114

for dst, src in pycompat.iteritems(cp):

1114

for dst, src in pycompat.iteritems(cp):

1115

if src in m1:

1115

if src in m1:

1116

copies2[dst] = src

1116

copies2[dst] = src

1117

1118

# file is missing if it isn't present in the destination, but is present in

1118

# file is missing if it isn't present in the destination, but is present in

1119

# the base and present in the source.

1119

# the base and present in the source.

1120

# Presence in the base is important to exclude added files, presence in the

1120

# Presence in the base is important to exclude added files, presence in the

1121

# source is important to exclude removed files.

1121

# source is important to exclude removed files.

1122

filt = lambda f: f not in m1 and f in base and f in c2

1122

filt = lambda f: f not in m1 and f in base and f in c2

1123

missingfiles = [f for f in changedfiles if filt(f)]

1123

missingfiles = [f for f in changedfiles if filt(f)]

1124

1125

copies1 = {}

1125

copies1 = {}

1126

if missingfiles:

1126

if missingfiles:

1127

basenametofilename = collections.defaultdict(list)

1127

basenametofilename = collections.defaultdict(list)

1128

dirnametofilename = collections.defaultdict(list)

1128

dirnametofilename = collections.defaultdict(list)

1129

1130

for f in m1.filesnotin(base.manifest()):

1130

for f in m1.filesnotin(base.manifest()):

1131

basename = os.path.basename(f)

1131

basename = os.path.basename(f)

1132

dirname = os.path.dirname(f)

1132

dirname = os.path.dirname(f)

1133

basenametofilename[basename].append(f)

1133

basenametofilename[basename].append(f)

1134

dirnametofilename[dirname].append(f)

1134

dirnametofilename[dirname].append(f)

1135

1136

for f in missingfiles:

1136

for f in missingfiles:

1137

basename = os.path.basename(f)

1137

basename = os.path.basename(f)

1138

dirname = os.path.dirname(f)

1138

dirname = os.path.dirname(f)

1139

samebasename = basenametofilename[basename]

1139

samebasename = basenametofilename[basename]

1140

samedirname = dirnametofilename[dirname]

1140

samedirname = dirnametofilename[dirname]

1141

movecandidates = samebasename + samedirname

1141

movecandidates = samebasename + samedirname

1142

# f is guaranteed to be present in c2, that's why

1142

# f is guaranteed to be present in c2, that's why

1143

# c2.filectx(f) won't fail

1143

# c2.filectx(f) won't fail

1144

f2 = c2.filectx(f)

1144

f2 = c2.filectx(f)

1145

# we can have a lot of candidates which can slow down the heuristics

1145

# we can have a lot of candidates which can slow down the heuristics

1146

# config value to limit the number of candidates moves to check

1146

# config value to limit the number of candidates moves to check

1147

maxcandidates = repo.ui.configint(

1147

maxcandidates = repo.ui.configint(

1148

b'experimental', b'copytrace.movecandidateslimit'

1148

b'experimental', b'copytrace.movecandidateslimit'

1149

)

1149

)

1150

1151

if len(movecandidates) > maxcandidates:

1151

if len(movecandidates) > maxcandidates:

1152

repo.ui.status(

1152

repo.ui.status(

1153

_(

1153

_(

1154

b"skipping copytracing for '%s', more "

1154

b"skipping copytracing for '%s', more "

1155

b"candidates than the limit: %d\n"

1155

b"candidates than the limit: %d\n"

1156

)

1156

)

1157

% (f, len(movecandidates))

1157

% (f, len(movecandidates))

1158

)

1158

)

1159

continue

1159

continue

1160

1161

for candidate in movecandidates:

1161

for candidate in movecandidates:

1162

f1 = c1.filectx(candidate)

1162

f1 = c1.filectx(candidate)

1163

if _related(f1, f2):

1163

if _related(f1, f2):

1164

# if there are a few related copies then we'll merge

1164

# if there are a few related copies then we'll merge

1165

# changes into all of them. This matches the behaviour

1165

# changes into all of them. This matches the behaviour

1166

# of upstream copytracing

1166

# of upstream copytracing

1167

copies1[candidate] = f

1167

copies1[candidate] = f

1168

1169

return branch_copies(copies1), branch_copies(copies2), {}

1169

return branch_copies(copies1), branch_copies(copies2), {}

1170

1171

1172

def _related(f1, f2):

1172

def _related(f1, f2):

1173

"""return True if f1 and f2 filectx have a common ancestor

1173

"""return True if f1 and f2 filectx have a common ancestor

1174

1175

Walk back to common ancestor to see if the two files originate

1175

Walk back to common ancestor to see if the two files originate

1176

from the same file. Since workingfilectx's rev() is None it messes

1176

from the same file. Since workingfilectx's rev() is None it messes

1177

up the integer comparison logic, hence the pre-step check for

1177

up the integer comparison logic, hence the pre-step check for

1178

None (f1 and f2 can only be workingfilectx's initially).

1178

None (f1 and f2 can only be workingfilectx's initially).

1179

"""

1179

"""

1180

1181

if f1 == f2:

1181

if f1 == f2:

1182

return True # a match

1182

return True # a match

1183

1184

g1, g2 = f1.ancestors(), f2.ancestors()

1184

g1, g2 = f1.ancestors(), f2.ancestors()

1185

try:

1185

try:

1186

f1r, f2r = f1.linkrev(), f2.linkrev()

1186

f1r, f2r = f1.linkrev(), f2.linkrev()

1187

1188

if f1r is None:

1188

if f1r is None:

1189

f1 = next(g1)

1189

f1 = next(g1)

1190

if f2r is None:

1190

if f2r is None:

1191

f2 = next(g2)

1191

f2 = next(g2)

1192

1193

while True:

1193

while True:

1194

f1r, f2r = f1.linkrev(), f2.linkrev()

1194

f1r, f2r = f1.linkrev(), f2.linkrev()

1195

if f1r > f2r:

1195

if f1r > f2r:

1196

f1 = next(g1)

1196

f1 = next(g1)

1197

elif f2r > f1r:

1197

elif f2r > f1r:

1198

f2 = next(g2)

1198

f2 = next(g2)

1199

else: # f1 and f2 point to files in the same linkrev

1199

else: # f1 and f2 point to files in the same linkrev

1200

return f1 == f2 # true if they point to the same file

1200

return f1 == f2 # true if they point to the same file

1201

except StopIteration:

1201

except StopIteration:

1202

return False

1202

return False

1203

1204

1205

def graftcopies(wctx, ctx, base):

1205

def graftcopies(wctx, ctx, base):

1206

"""reproduce copies between base and ctx in the wctx

1206

"""reproduce copies between base and ctx in the wctx

1207

1208

Unlike mergecopies(), this function will only consider copies between base

1208

Unlike mergecopies(), this function will only consider copies between base

1209

and ctx; it will ignore copies between base and wctx. Also unlike

1209

and ctx; it will ignore copies between base and wctx. Also unlike

1210

mergecopies(), this function will apply copies to the working copy (instead

1210

mergecopies(), this function will apply copies to the working copy (instead

1211

of just returning information about the copies). That makes it cheaper

1211

of just returning information about the copies). That makes it cheaper

1212

(especially in the common case of base==ctx.p1()) and useful also when

1212

(especially in the common case of base==ctx.p1()) and useful also when

1213

experimental.copytrace=off.

1213

experimental.copytrace=off.

1214

1215

merge.update() will have already marked most copies, but it will only

1215

merge.update() will have already marked most copies, but it will only

1216

mark copies if it thinks the source files are related (see

1216

mark copies if it thinks the source files are related (see

1217

merge._related()). It will also not mark copies if the file wasn't modified

1217

merge._related()). It will also not mark copies if the file wasn't modified

1218

on the local side. This function adds the copies that were "missed"

1218

on the local side. This function adds the copies that were "missed"

1219

by merge.update().

1219

by merge.update().

1220

"""

1220

"""

1221

new_copies = pathcopies(base, ctx)

1221

new_copies = pathcopies(base, ctx)

1222

_filter(wctx.p1(), wctx, new_copies)

1222

_filter(wctx.p1(), wctx, new_copies)

1223

for dst, src in pycompat.iteritems(new_copies):

1223

for dst, src in pycompat.iteritems(new_copies):

1224

wctx[dst].markcopied(src)

1224

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import (
                 nullid,
                 nullrev,
             )
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 if rustmod is not None:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         return (p1, p2, raw)
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         else:
                             changes = None
                         return (p1, p2, changes)
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
                     # this deal with the special case mentionned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
                     # We iterate over the childre, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
                     # An interresting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     return _combine_changeset_copies(
                         revs, children_count, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None:
                     final_copies = rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, isancestor
                     )
                 else:
                     isancestor = cached_is_ancestor(isancestor)
                     all_copies = {}
                     # iterate over all the "children" side of copy tracing "edge"
                     for current_rev in revs:
                         p1, p2, changes = revinfo(current_rev)
                         current_copies = None
                         # iterate over all parents to chain the existing data with the
                         # data from the parent → child edge.
                         for parent, parent_rev in ((1, p1), (2, p2)):
                             if parent_rev == nullrev:
                                 continue
                             remaining_children = children_count.get(parent_rev)
                             if remaining_children is None:
                                 continue
                             remaining_children -= 1
                             children_count[parent_rev] = remaining_children
                             if remaining_children:
                                 copies = all_copies.get(parent_rev, None)
                             else:
                                 copies = all_copies.pop(parent_rev, None)
                             if copies is None:
                                 # this is a root
                                 copies = {}
                             newcopies = copies
                             # chain the data in the edge with the existing data
                             if changes is not None:
                                 childcopies = {}
                                 if parent == 1:
                                     childcopies = changes.copied_from_p1
                                 elif parent == 2:
                                     childcopies = changes.copied_from_p2
                                 if childcopies:
                                     newcopies = copies.copy()
                                     for dest, source in pycompat.iteritems(childcopies):
                                         prev = copies.get(source)
                                         if prev is not None and prev[1] is not None:
                                             source = prev[1]
                                         newcopies[dest] = (current_rev, source)
                                     assert newcopies is not copies
                                 if changes.removed:
                                     if newcopies is copies:
                                         newcopies = copies.copy()
                                     for f in changes.removed:
                                         if f in newcopies:
                                             if newcopies is copies:
                                                 # copy on write to avoid affecting potential other
                                                 # branches.  when there are no other branches, this
                                                 # could be avoided.
                                                 newcopies = copies.copy()
                                             newcopies[f] = (current_rev, None)
                             # check potential need to combine the data from another parent (for
                             # that child). See comment below for details.
                             if current_copies is None:
                                 current_copies = newcopies
                             elif current_copies is newcopies:
                                 # nothing to merge:
                                 pass
                             else:
                                 # we are the second parent to work on c, we need to merge our
                                 # work with the other.
                                 #
                                 # In case of conflict, parent 1 take precedence over parent 2.
                                 # This is an arbitrary choice made anew when implementing
                                 # changeset based copies. It was made without regards with
                                 # potential filelog related behavior.
                                 assert parent == 2
                                 current_copies = _merge_copies_dict(
                                     newcopies, current_copies, isancestor, changes
                                 )
                         all_copies[current_rev] = current_copies
                     # filter out internal details and return a {dest: source mapping}
                     final_copies = {}
                     for dest, (tt, source) in all_copies[targetrev].items():
                         if source is not None:
                             final_copies[dest] = source
                 if not alwaysmatch:
                     for filename in list(final_copies.keys()):
                         if not match(filename):
                             del final_copies[filename]
                 return final_copies
             # constant to decide which side to pick with _merge_copies_dict
             PICK_MINOR = 0
             PICK_MAJOR = 1
             PICK_EITHER = 2
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         pick = _compare_values(changes, isancestor, dest, other, value)
                         if pick == PICK_MAJOR:
                             minor[dest] = value
                 return minor
-            def _compare_values(changes, isancestor, dest, other, value):
+            def _compare_values(changes, isancestor, dest, minor, major):
                 """compare two value within a _merge_copies_dict loop iteration"""
-                new_tt = value[0]
+                major_tt = major[0]
-                other_tt = other[0]
+                minor_tt = minor[0]
-                if value[1] == other[1]:
+                if major[1] == minor[1]:
                     return PICK_EITHER
                 # content from "major" wins, unless it is older
                 # than the branch point or there is a merge
-                if new_tt == other_tt:
+                if major_tt == minor_tt:
                     return PICK_MAJOR
-                elif changes is not None and value[1] is None and dest in changes.salvaged:
+                elif changes is not None and major[1] is None and dest in changes.salvaged:
                     return PICK_MINOR
-                elif changes is not None and other[1] is None and dest in changes.salvaged:
+                elif changes is not None and minor[1] is None and dest in changes.salvaged:
                     return PICK_MAJOR
                 elif changes is not None and dest in changes.merged:
                     return PICK_MAJOR
-                elif not isancestor(new_tt, other_tt):
+                elif not isancestor(major_tt, minor_tt):
-                    if value[1] is not None:
+                    if major[1] is not None:
                         return PICK_MAJOR
-                    elif isancestor(other_tt, new_tt):
+                    elif isancestor(minor_tt, major_tt):
                         return PICK_MAJOR
                 return PICK_MINOR
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)