upstream/mercurial-mirror Commit - r47126:f213b250

1

# coding: utf8

1

# coding: utf8

2

# copies.py - copy detection for Mercurial

2

# copies.py - copy detection for Mercurial

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import collections

11

import collections

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

from .node import (

15

from .node import (

16

nullid,

16

nullid,

17

nullrev,

17

nullrev,

18

)

18

)

19

20

from . import (

20

from . import (

21

match as matchmod,

21

match as matchmod,

22

pathutil,

22

pathutil,

23

policy,

23

policy,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

from .revlogutils import (

31

from .revlogutils import (

32

flagutil,

32

flagutil,

33

sidedata as sidedatamod,

33

sidedata as sidedatamod,

34

)

34

)

35

36

rustmod = policy.importrust("copy_tracing")

36

rustmod = policy.importrust("copy_tracing")

37

38

39

def _filter(src, dst, t):

39

def _filter(src, dst, t):

40

"""filters out invalid copies after chaining"""

40

"""filters out invalid copies after chaining"""

41

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

42

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

43

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

44

# in the following table (not including trivial cases). For example, case 6

44

# in the following table (not including trivial cases). For example, case 6

45

# is where a file existed in 'src' and remained under that name in 'mid' and

45

# is where a file existed in 'src' and remained under that name in 'mid' and

46

# then was renamed between 'mid' and 'dst'.

46

# then was renamed between 'mid' and 'dst'.

47

#

47

#

48

# case src mid dst result

48

# case src mid dst result

49

# 1 x y - -

49

# 1 x y - -

50

# 2 x y y x->y

50

# 2 x y y x->y

51

# 3 x y x -

51

# 3 x y x -

52

# 4 x y z x->z

52

# 4 x y z x->z

53

# 5 - x y -

53

# 5 - x y -

54

# 6 x x y x->y

54

# 6 x x y x->y

55

#

55

#

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

56

# _chain() takes care of chaining the copies in 'a' and 'b', but it

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

57

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

58

# between 5 and 6, so it includes all cases in its result.

58

# between 5 and 6, so it includes all cases in its result.

59

# Cases 1, 3, and 5 are then removed by _filter().

59

# Cases 1, 3, and 5 are then removed by _filter().

60

61

for k, v in list(t.items()):

61

for k, v in list(t.items()):

62

# remove copies from files that didn't exist

62

# remove copies from files that didn't exist

63

if v not in src:

63

if v not in src:

64

del t[k]

64

del t[k]

65

# remove criss-crossed copies

65

# remove criss-crossed copies

66

elif k in src and v in dst:

66

elif k in src and v in dst:

67

del t[k]

67

del t[k]

68

# remove copies to files that were then removed

68

# remove copies to files that were then removed

69

elif k not in dst:

69

elif k not in dst:

70

del t[k]

70

del t[k]

71

72

73

def _chain(prefix, suffix):

73

def _chain(prefix, suffix):

74

"""chain two sets of copies 'prefix' and 'suffix'"""

74

"""chain two sets of copies 'prefix' and 'suffix'"""

75

result = prefix.copy()

75

result = prefix.copy()

76

for key, value in pycompat.iteritems(suffix):

76

for key, value in pycompat.iteritems(suffix):

77

result[key] = prefix.get(value, value)

77

result[key] = prefix.get(value, value)

78

return result

78

return result

79

80

81

def _tracefile(fctx, am, basemf):

81

def _tracefile(fctx, am, basemf):

82

"""return file context that is the ancestor of fctx present in ancestor

82

"""return file context that is the ancestor of fctx present in ancestor

83

manifest am

83

manifest am

84

85

Note: we used to try and stop after a given limit, however checking if that

85

Note: we used to try and stop after a given limit, however checking if that

86

limit is reached turned out to be very expensive. we are better off

86

limit is reached turned out to be very expensive. we are better off

87

disabling that feature."""

87

disabling that feature."""

88

89

for f in fctx.ancestors():

89

for f in fctx.ancestors():

90

path = f.path()

90

path = f.path()

91

if am.get(path, None) == f.filenode():

91

if am.get(path, None) == f.filenode():

92

return path

92

return path

93

if basemf and basemf.get(path, None) == f.filenode():

93

if basemf and basemf.get(path, None) == f.filenode():

94

return path

94

return path

95

96

97

def _dirstatecopies(repo, match=None):

97

def _dirstatecopies(repo, match=None):

98

ds = repo.dirstate

98

ds = repo.dirstate

99

c = ds.copies().copy()

99

c = ds.copies().copy()

100

for k in list(c):

100

for k in list(c):

101

if ds[k] not in b'anm' or (match and not match(k)):

101

if ds[k] not in b'anm' or (match and not match(k)):

102

del c[k]

102

del c[k]

103

return c

103

return c

104

105

106

def _computeforwardmissing(a, b, match=None):

106

def _computeforwardmissing(a, b, match=None):

107

"""Computes which files are in b but not a.

107

"""Computes which files are in b but not a.

108

This is its own function so extensions can easily wrap this call to see what

108

This is its own function so extensions can easily wrap this call to see what

109

files _forwardcopies is about to process.

109

files _forwardcopies is about to process.

110

"""

110

"""

111

ma = a.manifest()

111

ma = a.manifest()

112

mb = b.manifest()

112

mb = b.manifest()

113

return mb.filesnotin(ma, match=match)

113

return mb.filesnotin(ma, match=match)

114

115

116

def usechangesetcentricalgo(repo):

116

def usechangesetcentricalgo(repo):

117

"""Checks if we should use changeset-centric copy algorithms"""

117

"""Checks if we should use changeset-centric copy algorithms"""

118

if repo.filecopiesmode == b'changeset-sidedata':

118

if repo.filecopiesmode == b'changeset-sidedata':

119

return True

119

return True

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

120

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

121

changesetsource = (b'changeset-only', b'compatibility')

121

changesetsource = (b'changeset-only', b'compatibility')

122

return readfrom in changesetsource

122

return readfrom in changesetsource

123

124

125

def _committedforwardcopies(a, b, base, match):

125

def _committedforwardcopies(a, b, base, match):

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

126

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

127

# files might have to be traced back to the fctx parent of the last

127

# files might have to be traced back to the fctx parent of the last

128

# one-side-only changeset, but not further back than that

128

# one-side-only changeset, but not further back than that

129

repo = a._repo

129

repo = a._repo

130

131

if usechangesetcentricalgo(repo):

131

if usechangesetcentricalgo(repo):

132

return _changesetforwardcopies(a, b, match)

132

return _changesetforwardcopies(a, b, match)

133

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

134

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

135

dbg = repo.ui.debug

135

dbg = repo.ui.debug

136

if debug:

136

if debug:

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

137

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

138

am = a.manifest()

138

am = a.manifest()

139

basemf = None if base is None else base.manifest()

139

basemf = None if base is None else base.manifest()

140

141

# find where new files came from

141

# find where new files came from

142

# we currently don't try to find where old files went, too expensive

142

# we currently don't try to find where old files went, too expensive

143

# this means we can miss a case like 'hg rm b; hg cp a b'

143

# this means we can miss a case like 'hg rm b; hg cp a b'

144

cm = {}

144

cm = {}

145

146

# Computing the forward missing is quite expensive on large manifests, since

146

# Computing the forward missing is quite expensive on large manifests, since

147

# it compares the entire manifests. We can optimize it in the common use

147

# it compares the entire manifests. We can optimize it in the common use

148

# case of computing what copies are in a commit versus its parent (like

148

# case of computing what copies are in a commit versus its parent (like

149

# during a rebase or histedit). Note, we exclude merge commits from this

149

# during a rebase or histedit). Note, we exclude merge commits from this

150

# optimization, since the ctx.files() for a merge commit is not correct for

150

# optimization, since the ctx.files() for a merge commit is not correct for

151

# this comparison.

151

# this comparison.

152

forwardmissingmatch = match

152

forwardmissingmatch = match

153

if b.p1() == a and b.p2().node() == nullid:

153

if b.p1() == a and b.p2().node() == nullid:

154

filesmatcher = matchmod.exact(b.files())

154

filesmatcher = matchmod.exact(b.files())

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

155

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

156

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

157

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

158

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

159

160

if debug:

160

if debug:

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

161

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

162

163

for f in sorted(missing):

163

for f in sorted(missing):

164

if debug:

164

if debug:

165

dbg(b'debug.copies: tracing file: %s\n' % f)

165

dbg(b'debug.copies: tracing file: %s\n' % f)

166

fctx = b[f]

166

fctx = b[f]

167

fctx._ancestrycontext = ancestrycontext

167

fctx._ancestrycontext = ancestrycontext

168

169

if debug:

169

if debug:

170

start = util.timer()

170

start = util.timer()

171

opath = _tracefile(fctx, am, basemf)

171

opath = _tracefile(fctx, am, basemf)

172

if opath:

172

if opath:

173

if debug:

173

if debug:

174

dbg(b'debug.copies: rename of: %s\n' % opath)

174

dbg(b'debug.copies: rename of: %s\n' % opath)

175

cm[f] = opath

175

cm[f] = opath

176

if debug:

176

if debug:

177

dbg(

177

dbg(

178

b'debug.copies: time: %f seconds\n'

178

b'debug.copies: time: %f seconds\n'

179

% (util.timer() - start)

179

% (util.timer() - start)

180

)

180

)

181

return cm

181

return cm

182

183

184

def _revinfo_getter(repo, match):

184

def _revinfo_getter(repo, match):

185

"""returns a function that returns the following data given a <rev>"

185

"""returns a function that returns the following data given a <rev>"

186

187

* p1: revision number of first parent

187

* p1: revision number of first parent

188

* p2: revision number of first parent

188

* p2: revision number of first parent

189

* changes: a ChangingFiles object

189

* changes: a ChangingFiles object

190

"""

190

"""

191

cl = repo.changelog

191

cl = repo.changelog

192

parents = cl.parentrevs

192

parents = cl.parentrevs

193

flags = cl.flags

193

flags = cl.flags

194

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

195

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

196

197

changelogrevision = cl.changelogrevision

197

changelogrevision = cl.changelogrevision

198

199

if rustmod is not None:

199

if rustmod is not None:

200

201

def revinfo(rev):

201

def revinfo(rev):

202

p1, p2 = parents(rev)

202

p1, p2 = parents(rev)

203

if flags(rev) & HASCOPIESINFO:

203

if flags(rev) & HASCOPIESINFO:

204

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

204

raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)

205

else:

205

else:

206

raw = None

206

raw = None

207

return (p1, p2, raw)

207

return (p1, p2, raw)

208

209

else:

209

else:

210

211

def revinfo(rev):

211

def revinfo(rev):

212

p1, p2 = parents(rev)

212

p1, p2 = parents(rev)

213

if flags(rev) & HASCOPIESINFO:

213

if flags(rev) & HASCOPIESINFO:

214

changes = changelogrevision(rev).changes

214

changes = changelogrevision(rev).changes

215

else:

215

else:

216

changes = None

216

changes = None

217

return (p1, p2, changes)

217

return (p1, p2, changes)

218

219

return revinfo

219

return revinfo

220

221

222

def cached_is_ancestor(is_ancestor):

222

def cached_is_ancestor(is_ancestor):

223

"""return a cached version of is_ancestor"""

223

"""return a cached version of is_ancestor"""

224

cache = {}

224

cache = {}

225

226

def _is_ancestor(anc, desc):

226

def _is_ancestor(anc, desc):

227

if anc > desc:

227

if anc > desc:

228

return False

228

return False

229

elif anc == desc:

229

elif anc == desc:

230

return True

230

return True

231

key = (anc, desc)

231

key = (anc, desc)

232

ret = cache.get(key)

232

ret = cache.get(key)

233

if ret is None:

233

if ret is None:

234

ret = cache[key] = is_ancestor(anc, desc)

234

ret = cache[key] = is_ancestor(anc, desc)

235

return ret

235

return ret

236

237

return _is_ancestor

237

return _is_ancestor

238

239

240

def _changesetforwardcopies(a, b, match):

240

def _changesetforwardcopies(a, b, match):

241

if a.rev() in (nullrev, b.rev()):

241

if a.rev() in (nullrev, b.rev()):

242

return {}

242

return {}

243

244

repo = a.repo().unfiltered()

244

repo = a.repo().unfiltered()

245

children = {}

245

children = {}

246

247

cl = repo.changelog

247

cl = repo.changelog

248

isancestor = cl.isancestorrev

248

isancestor = cl.isancestorrev

249

250

# To track rename from "A" to B, we need to gather all parent → children

250

# To track rename from "A" to B, we need to gather all parent → children

251

# edges that are contains in `::B` but not in `::A`.

251

# edges that are contains in `::B` but not in `::A`.

252

#

252

#

253

#

253

#

254

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

254

# To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -

255

# ::a`) and also all the "roots point", ie the parents of the exclusive set

255

# ::a`) and also all the "roots point", ie the parents of the exclusive set

256

# that belong to ::a. These are exactly all the revisions needed to express

256

# that belong to ::a. These are exactly all the revisions needed to express

257

# the parent → children we need to combine.

257

# the parent → children we need to combine.

258

#

258

#

259

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

259

# [1] actually, we need to gather all the edges within `(::a)::b`, ie:

260

# excluding paths that leads to roots that are not ancestors of `a`. We

260

# excluding paths that leads to roots that are not ancestors of `a`. We

261

# keep this out of the explanation because it is hard enough without this special case..

261

# keep this out of the explanation because it is hard enough without this special case..

262

263

parents = cl._uncheckedparentrevs

263

parents = cl._uncheckedparentrevs

264

graph_roots = (nullrev, nullrev)

264

graph_roots = (nullrev, nullrev)

265

266

ancestors = cl.ancestors([a.rev()], inclusive=True)

266

ancestors = cl.ancestors([a.rev()], inclusive=True)

267

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

267

revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

268

roots = set()

268

roots = set()

269

has_graph_roots = False

269

has_graph_roots = False

270

271

# iterate over `only(B, A)`

271

# iterate over `only(B, A)`

272

for r in revs:

272

for r in revs:

273

ps = parents(r)

273

ps = parents(r)

274

if ps == graph_roots:

274

if ps == graph_roots:

275

has_graph_roots = True

275

has_graph_roots = True

276

else:

276

else:

277

p1, p2 = ps

277

p1, p2 = ps

278

279

# find all the "root points" (see larger comment above)

279

# find all the "root points" (see larger comment above)

280

if p1 != nullrev and p1 in ancestors:

280

if p1 != nullrev and p1 in ancestors:

281

roots.add(p1)

281

roots.add(p1)

282

if p2 != nullrev and p2 in ancestors:

282

if p2 != nullrev and p2 in ancestors:

283

roots.add(p2)

283

roots.add(p2)

284

if not roots:

284

if not roots:

285

# no common revision to track copies from

285

# no common revision to track copies from

286

return {}

286

return {}

287

if has_graph_roots:

287

if has_graph_roots:

288

# this deal with the special case mentionned in the [1] footnotes. We

288

# this deal with the special case mentionned in the [1] footnotes. We

289

# must filter out revisions that leads to non-common graphroots.

289

# must filter out revisions that leads to non-common graphroots.

290

roots = list(roots)

290

roots = list(roots)

291

m = min(roots)

291

m = min(roots)

292

h = [b.rev()]

292

h = [b.rev()]

293

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

293

roots_to_head = cl.reachableroots(m, h, roots, includepath=True)

294

roots_to_head = set(roots_to_head)

294

roots_to_head = set(roots_to_head)

295

revs = [r for r in revs if r in roots_to_head]

295

revs = [r for r in revs if r in roots_to_head]

296

297

if repo.filecopiesmode == b'changeset-sidedata':

297

if repo.filecopiesmode == b'changeset-sidedata':

298

# When using side-data, we will process the edges "from" the children.

298

# When using side-data, we will process the edges "from" the children.

299

# We iterate over the childre, gathering previous collected data for

299

# We iterate over the childre, gathering previous collected data for

300

# the parents. Do know when the parents data is no longer necessary, we

300

# the parents. Do know when the parents data is no longer necessary, we

301

# keep a counter of how many children each revision has.

301

# keep a counter of how many children each revision has.

302

#

302

#

303

# An interresting property of `children_count` is that it only contains

303

# An interresting property of `children_count` is that it only contains

304

# revision that will be relevant for a edge of the graph. So if a

304

# revision that will be relevant for a edge of the graph. So if a

305

# children has parent not in `children_count`, that edges should not be

305

# children has parent not in `children_count`, that edges should not be

306

# processed.

306

# processed.

307

children_count = dict((r, 0) for r in roots)

307

children_count = dict((r, 0) for r in roots)

308

for r in revs:

308

for r in revs:

309

for p in cl.parentrevs(r):

309

for p in cl.parentrevs(r):

310

if p == nullrev:

310

if p == nullrev:

311

continue

311

continue

312

children_count[r] = 0

312

children_count[r] = 0

313

if p in children_count:

313

if p in children_count:

314

children_count[p] += 1

314

children_count[p] += 1

315

revinfo = _revinfo_getter(repo, match)

315

revinfo = _revinfo_getter(repo, match)

316

return _combine_changeset_copies(

316

return _combine_changeset_copies(

317

revs, children_count, b.rev(), revinfo, match, isancestor

317

revs, children_count, b.rev(), revinfo, match, isancestor

318

)

318

)

319

else:

319

else:

320

# When not using side-data, we will process the edges "from" the parent.

320

# When not using side-data, we will process the edges "from" the parent.

321

# so we need a full mapping of the parent -> children relation.

321

# so we need a full mapping of the parent -> children relation.

322

children = dict((r, []) for r in roots)

322

children = dict((r, []) for r in roots)

323

for r in revs:

323

for r in revs:

324

for p in cl.parentrevs(r):

324

for p in cl.parentrevs(r):

325

if p == nullrev:

325

if p == nullrev:

326

continue

326

continue

327

children[r] = []

327

children[r] = []

328

if p in children:

328

if p in children:

329

children[p].append(r)

329

children[p].append(r)

330

x = revs.pop()

330

x = revs.pop()

331

assert x == b.rev()

331

assert x == b.rev()

332

revs.extend(roots)

332

revs.extend(roots)

333

revs.sort()

333

revs.sort()

334

335

revinfo = _revinfo_getter_extra(repo)

335

revinfo = _revinfo_getter_extra(repo)

336

return _combine_changeset_copies_extra(

336

return _combine_changeset_copies_extra(

337

revs, children, b.rev(), revinfo, match, isancestor

337

revs, children, b.rev(), revinfo, match, isancestor

338

)

338

)

339

340

341

def _combine_changeset_copies(

341

def _combine_changeset_copies(

342

revs, children_count, targetrev, revinfo, match, isancestor

342

revs, children_count, targetrev, revinfo, match, isancestor

343

):

343

):

344

"""combine the copies information for each item of iterrevs

344

"""combine the copies information for each item of iterrevs

345

346

revs: sorted iterable of revision to visit

346

revs: sorted iterable of revision to visit

347

children_count: a {parent: <number-of-relevant-children>} mapping.

347

children_count: a {parent: <number-of-relevant-children>} mapping.

348

targetrev: the final copies destination revision (not in iterrevs)

348

targetrev: the final copies destination revision (not in iterrevs)

349

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

349

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

350

match: a matcher

350

match: a matcher

351

352

It returns the aggregated copies information for `targetrev`.

352

It returns the aggregated copies information for `targetrev`.

353

"""

353

"""

354

355

alwaysmatch = match.always()

355

alwaysmatch = match.always()

356

357

if rustmod is not None:

357

if rustmod is not None:

358

final_copies = rustmod.combine_changeset_copies(

358

final_copies = rustmod.combine_changeset_copies(

359

list(revs), children_count, targetrev, revinfo, isancestor

359

list(revs), children_count, targetrev, revinfo, isancestor

360

)

360

)

361

else:

361

else:

362

isancestor = cached_is_ancestor(isancestor)

362

isancestor = cached_is_ancestor(isancestor)

363

364

all_copies = {}

364

all_copies = {}

365

# iterate over all the "children" side of copy tracing "edge"

365

# iterate over all the "children" side of copy tracing "edge"

366

for current_rev in revs:

366

for current_rev in revs:

367

p1, p2, changes = revinfo(current_rev)

367

p1, p2, changes = revinfo(current_rev)

368

current_copies = None

368

current_copies = None

369

# iterate over all parents to chain the existing data with the

369

# iterate over all parents to chain the existing data with the

370

# data from the parent → child edge.

370

# data from the parent → child edge.

371

for parent, parent_rev in ((1, p1), (2, p2)):

371

for parent, parent_rev in ((1, p1), (2, p2)):

372

if parent_rev == nullrev:

372

if parent_rev == nullrev:

373

continue

373

continue

374

remaining_children = children_count.get(parent_rev)

374

remaining_children = children_count.get(parent_rev)

375

if remaining_children is None:

375

if remaining_children is None:

376

continue

376

continue

377

remaining_children -= 1

377

remaining_children -= 1

378

children_count[parent_rev] = remaining_children

378

children_count[parent_rev] = remaining_children

379

if remaining_children:

379

if remaining_children:

380

copies = all_copies.get(parent_rev, None)

380

copies = all_copies.get(parent_rev, None)

381

else:

381

else:

382

copies = all_copies.pop(parent_rev, None)

382

copies = all_copies.pop(parent_rev, None)

383

384

if copies is None:

384

if copies is None:

385

# this is a root

385

# this is a root

386

newcopies = copies = {}

386

newcopies = copies = {}

387

elif remaining_children:

387

elif remaining_children:

388

newcopies = copies.copy()

388

newcopies = copies.copy()

389

else:

389

else:

390

newcopies = copies

390

newcopies = copies

391

# chain the data in the edge with the existing data

391

# chain the data in the edge with the existing data

392

if changes is not None:

392

if changes is not None:

393

childcopies = {}

393

childcopies = {}

394

if parent == 1:

394

if parent == 1:

395

childcopies = changes.copied_from_p1

395

childcopies = changes.copied_from_p1

396

elif parent == 2:

396

elif parent == 2:

397

childcopies = changes.copied_from_p2

397

childcopies = changes.copied_from_p2

398

399

if childcopies:

399

if childcopies:

400

newcopies = copies.copy()

400

newcopies = copies.copy()

401

for dest, source in pycompat.iteritems(childcopies):

401

for dest, source in pycompat.iteritems(childcopies):

402

prev = copies.get(source)

402

prev = copies.get(source)

403

if prev is not None and prev[1] is not None:

403

if prev is not None and prev[1] is not None:

404

source = prev[1]

404

source = prev[1]

405

newcopies[dest] = (current_rev, source)

405

newcopies[dest] = (current_rev, source)

406

assert newcopies is not copies

406

assert newcopies is not copies

407

if changes.removed:

407

if changes.removed:

408

for f in changes.removed:

408

for f in changes.removed:

409

if f in newcopies:

409

if f in newcopies:

410

if newcopies is copies:

410

if newcopies is copies:

411

# copy on write to avoid affecting potential other

411

# copy on write to avoid affecting potential other

412

# branches. when there are no other branches, this

412

# branches. when there are no other branches, this

413

# could be avoided.

413

# could be avoided.

414

newcopies = copies.copy()

414

newcopies = copies.copy()

415

newcopies[f] = (current_rev, None)

415

newcopies[f] = (current_rev, None)

416

# check potential need to combine the data from another parent (for

416

# check potential need to combine the data from another parent (for

417

# that child). See comment below for details.

417

# that child). See comment below for details.

418

if current_copies is None:

418

if current_copies is None:

419

current_copies = newcopies

419

current_copies = newcopies

420

else:

420

else:

421

# we are the second parent to work on c, we need to merge our

421

# we are the second parent to work on c, we need to merge our

422

# work with the other.

422

# work with the other.

423

#

423

#

424

# In case of conflict, parent 1 take precedence over parent 2.

424

# In case of conflict, parent 1 take precedence over parent 2.

425

# This is an arbitrary choice made anew when implementing

425

# This is an arbitrary choice made anew when implementing

426

# changeset based copies. It was made without regards with

426

# changeset based copies. It was made without regards with

427

# potential filelog related behavior.

427

# potential filelog related behavior.

428

assert parent == 2

428

assert parent == 2

429

current_copies = _merge_copies_dict(

429

current_copies = _merge_copies_dict(

430

newcopies, current_copies, isancestor, changes

430

newcopies, current_copies, isancestor, changes

431

)

431

)

432

all_copies[current_rev] = current_copies

432

all_copies[current_rev] = current_copies

433

434

# filter out internal details and return a {dest: source mapping}

434

# filter out internal details and return a {dest: source mapping}

435

final_copies = {}

435

final_copies = {}

436

for dest, (tt, source) in all_copies[targetrev].items():

436

for dest, (tt, source) in all_copies[targetrev].items():

437

if source is not None:

437

if source is not None:

438

final_copies[dest] = source

438

final_copies[dest] = source

439

if not alwaysmatch:

439

if not alwaysmatch:

440

for filename in list(final_copies.keys()):

440

for filename in list(final_copies.keys()):

441

if not match(filename):

441

if not match(filename):

442

del final_copies[filename]

442

del final_copies[filename]

443

return final_copies

443

return final_copies

444

445

446

# constant to decide which side to pick with _merge_copies_dict

446

# constant to decide which side to pick with _merge_copies_dict

447

PICK_MINOR = 0

447

PICK_MINOR = 0

448

PICK_MAJOR = 1

448

PICK_MAJOR = 1

449

PICK_EITHER = 2

449

PICK_EITHER = 2

450

451

452

def _merge_copies_dict(minor, major, isancestor, changes):

452

def _merge_copies_dict(minor, major, isancestor, changes):

453

"""merge two copies-mapping together, minor and major

453

"""merge two copies-mapping together, minor and major

454

455

In case of conflict, value from "major" will be picked.

455

In case of conflict, value from "major" will be picked.

456

457

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

457

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

458

ancestors of `high_rev`,

458

ancestors of `high_rev`,

459

460

- `ismerged(path)`: callable return True if `path` have been merged in the

460

- `ismerged(path)`: callable return True if `path` have been merged in the

461

current revision,

461

current revision,

462

463

return the resulting dict (in practice, the "minor" object, updated)

463

return the resulting dict (in practice, the "minor" object, updated)

464

"""

464

"""

465

for dest, value in major.items():

465

for dest, value in major.items():

466

other = minor.get(dest)

466

other = minor.get(dest)

467

if other is None:

467

if other is None:

468

minor[dest] = value

468

minor[dest] = value

469

else:

469

else:

470

pick = _compare_values(changes, isancestor, dest, other, value)

470

pick = _compare_values(changes, isancestor, dest, other, value)

471

if pick == PICK_MAJOR:

471

if pick == PICK_MAJOR:

472

minor[dest] = value

472

minor[dest] = value

473

return minor

473

return minor

474

475

476

def _compare_values(changes, isancestor, dest, minor, major):

476

def _compare_values(changes, isancestor, dest, minor, major):

477

"""compare two value within a _merge_copies_dict loop iteration"""

477

"""compare two value within a _merge_copies_dict loop iteration"""

478

major_tt, major_value = major

478

major_tt, major_value = major

479

minor_tt, minor_value = minor

479

minor_tt, minor_value = minor

480

481

# evacuate some simple case first:

481

# evacuate some simple case first:

482

if major_tt == minor_tt:

482

if major_tt == minor_tt:

483

# if it comes from the same revision it must be the same value

483

# if it comes from the same revision it must be the same value

484

assert major_value == minor_value

484

assert major_value == minor_value

485

return PICK_EITHER

485

return PICK_EITHER

486

elif major[1] == minor[1]:

486

elif major[1] == minor[1]:

487

return PICK_EITHER

487

return PICK_EITHER

488

489

# actual merging needed: content from "major" wins, unless it is older than

489

# actual merging needed: content from "major" wins, unless it is older than

490

# the branch point or there is a merge

490

# the branch point or there is a merge

491

elif changes is not None and major[1] is None and dest in changes.salvaged:

491

elif changes is not None and major[1] is None and dest in changes.salvaged:

492

return PICK_MINOR

492

return PICK_MINOR

493

elif changes is not None and minor[1] is None and dest in changes.salvaged:

493

elif changes is not None and minor[1] is None and dest in changes.salvaged:

494

return PICK_MAJOR

494

return PICK_MAJOR

495

elif changes is not None and dest in changes.merged:

495

elif changes is not None and dest in changes.merged:

496

return PICK_MAJOR

496

return PICK_MAJOR

497

elif not isancestor(major_tt, minor_tt):

497

elif not isancestor(major_tt, minor_tt):

498

if major[1] is not None:

498

if major[1] is not None:

499

return PICK_MAJOR

499

return PICK_MAJOR

500

elif isancestor(minor_tt, major_tt):

500

elif isancestor(minor_tt, major_tt):

501

return PICK_MAJOR

501

return PICK_MAJOR

502

return PICK_MINOR

502

return PICK_MINOR

503

504

505

def _revinfo_getter_extra(repo):

505

def _revinfo_getter_extra(repo):

506

"""return a function that return multiple data given a <rev>"i

506

"""return a function that return multiple data given a <rev>"i

507

508

* p1: revision number of first parent

508

* p1: revision number of first parent

509

* p2: revision number of first parent

509

* p2: revision number of first parent

510

* p1copies: mapping of copies from p1

510

* p1copies: mapping of copies from p1

511

* p2copies: mapping of copies from p2

511

* p2copies: mapping of copies from p2

512

* removed: a list of removed files

512

* removed: a list of removed files

513

* ismerged: a callback to know if file was merged in that revision

513

* ismerged: a callback to know if file was merged in that revision

514

"""

514

"""

515

cl = repo.changelog

515

cl = repo.changelog

516

parents = cl.parentrevs

516

parents = cl.parentrevs

517

518

def get_ismerged(rev):

518

def get_ismerged(rev):

519

ctx = repo[rev]

519

ctx = repo[rev]

520

521

def ismerged(path):

521

def ismerged(path):

522

if path not in ctx.files():

522

if path not in ctx.files():

523

return False

523

return False

524

fctx = ctx[path]

524

fctx = ctx[path]

525

parents = fctx._filelog.parents(fctx._filenode)

525

parents = fctx._filelog.parents(fctx._filenode)

526

nb_parents = 0

526

nb_parents = 0

527

for n in parents:

527

for n in parents:

528

if n != nullid:

528

if n != nullid:

529

nb_parents += 1

529

nb_parents += 1

530

return nb_parents >= 2

530

return nb_parents >= 2

531

532

return ismerged

532

return ismerged

533

534

def revinfo(rev):

534

def revinfo(rev):

535

p1, p2 = parents(rev)

535

p1, p2 = parents(rev)

536

ctx = repo[rev]

536

ctx = repo[rev]

537

p1copies, p2copies = ctx._copies

537

p1copies, p2copies = ctx._copies

538

removed = ctx.filesremoved()

538

removed = ctx.filesremoved()

539

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

539

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

540

541

return revinfo

541

return revinfo

542

543

544

def _combine_changeset_copies_extra(

544

def _combine_changeset_copies_extra(

545

revs, children, targetrev, revinfo, match, isancestor

545

revs, children, targetrev, revinfo, match, isancestor

546

):

546

):

547

"""version of `_combine_changeset_copies` that works with the Google

547

"""version of `_combine_changeset_copies` that works with the Google

548

specific "extra" based storage for copy information"""

548

specific "extra" based storage for copy information"""

549

all_copies = {}

549

all_copies = {}

550

alwaysmatch = match.always()

550

alwaysmatch = match.always()

551

for r in revs:

551

for r in revs:

552

copies = all_copies.pop(r, None)

552

copies = all_copies.pop(r, None)

553

if copies is None:

553

if copies is None:

554

# this is a root

554

# this is a root

555

copies = {}

555

copies = {}

556

for i, c in enumerate(children[r]):

556

for i, c in enumerate(children[r]):

557

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

557

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

558

if r == p1:

558

if r == p1:

559

parent = 1

559

parent = 1

560

childcopies = p1copies

560

childcopies = p1copies

561

else:

561

else:

562

assert r == p2

562

assert r == p2

563

parent = 2

563

parent = 2

564

childcopies = p2copies

564

childcopies = p2copies

565

if not alwaysmatch:

565

if not alwaysmatch:

566

childcopies = {

566

childcopies = {

567

dst: src for dst, src in childcopies.items() if match(dst)

567

dst: src for dst, src in childcopies.items() if match(dst)

568

}

568

}

569

newcopies = copies

569

newcopies = copies

570

if childcopies:

570

if childcopies:

571

newcopies = copies.copy()

571

newcopies = copies.copy()

572

for dest, source in pycompat.iteritems(childcopies):

572

for dest, source in pycompat.iteritems(childcopies):

573

prev = copies.get(source)

573

prev = copies.get(source)

574

if prev is not None and prev[1] is not None:

574

if prev is not None and prev[1] is not None:

575

source = prev[1]

575

source = prev[1]

576

newcopies[dest] = (c, source)

576

newcopies[dest] = (c, source)

577

assert newcopies is not copies

577

assert newcopies is not copies

578

for f in removed:

578

for f in removed:

579

if f in newcopies:

579

if f in newcopies:

580

if newcopies is copies:

580

if newcopies is copies:

581

# copy on write to avoid affecting potential other

581

# copy on write to avoid affecting potential other

582

# branches. when there are no other branches, this

582

# branches. when there are no other branches, this

583

# could be avoided.

583

# could be avoided.

584

newcopies = copies.copy()

584

newcopies = copies.copy()

585

newcopies[f] = (c, None)

585

newcopies[f] = (c, None)

586

othercopies = all_copies.get(c)

586

othercopies = all_copies.get(c)

587

if othercopies is None:

587

if othercopies is None:

588

all_copies[c] = newcopies

588

all_copies[c] = newcopies

589

else:

589

else:

590

# we are the second parent to work on c, we need to merge our

590

# we are the second parent to work on c, we need to merge our

591

# work with the other.

591

# work with the other.

592

#

592

#

593

# In case of conflict, parent 1 take precedence over parent 2.

593

# In case of conflict, parent 1 take precedence over parent 2.

594

# This is an arbitrary choice made anew when implementing

594

# This is an arbitrary choice made anew when implementing

595

# changeset based copies. It was made without regards with

595

# changeset based copies. It was made without regards with

596

# potential filelog related behavior.

596

# potential filelog related behavior.

597

if parent == 1:

597

if parent == 1:

598

_merge_copies_dict_extra(

598

_merge_copies_dict_extra(

599

othercopies, newcopies, isancestor, ismerged

599

othercopies, newcopies, isancestor, ismerged

600

)

600

)

601

else:

601

else:

602

_merge_copies_dict_extra(

602

_merge_copies_dict_extra(

603

newcopies, othercopies, isancestor, ismerged

603

newcopies, othercopies, isancestor, ismerged

604

)

604

)

605

all_copies[c] = newcopies

605

all_copies[c] = newcopies

606

607

final_copies = {}

607

final_copies = {}

608

for dest, (tt, source) in all_copies[targetrev].items():

608

for dest, (tt, source) in all_copies[targetrev].items():

609

if source is not None:

609

if source is not None:

610

final_copies[dest] = source

610

final_copies[dest] = source

611

return final_copies

611

return final_copies

612

613

614

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

614

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

615

"""version of `_merge_copies_dict` that works with the Google

615

"""version of `_merge_copies_dict` that works with the Google

616

specific "extra" based storage for copy information"""

616

specific "extra" based storage for copy information"""

617

for dest, value in major.items():

617

for dest, value in major.items():

618

other = minor.get(dest)

618

other = minor.get(dest)

619

if other is None:

619

if other is None:

620

minor[dest] = value

620

minor[dest] = value

621

else:

621

else:

622

new_tt = value[0]

622

new_tt = value[0]

623

other_tt = other[0]

623

other_tt = other[0]

624

if value[1] == other[1]:

624

if value[1] == other[1]:

625

continue

625

continue

626

# content from "major" wins, unless it is older

626

# content from "major" wins, unless it is older

627

# than the branch point or there is a merge

627

# than the branch point or there is a merge

628

if (

628

if (

629

new_tt == other_tt

629

new_tt == other_tt

630

or not isancestor(new_tt, other_tt)

630

or not isancestor(new_tt, other_tt)

631

or ismerged(dest)

631

or ismerged(dest)

632

):

632

):

633

minor[dest] = value

633

minor[dest] = value

634

635

636

def _forwardcopies(a, b, base=None, match=None):

636

def _forwardcopies(a, b, base=None, match=None):

637

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

637

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

638

639

if base is None:

639

if base is None:

640

base = a

640

base = a

641

match = a.repo().narrowmatch(match)

641

match = a.repo().narrowmatch(match)

642

# check for working copy

642

# check for working copy

643

if b.rev() is None:

643

if b.rev() is None:

644

cm = _committedforwardcopies(a, b.p1(), base, match)

644

cm = _committedforwardcopies(a, b.p1(), base, match)

645

# combine copies from dirstate if necessary

645

# combine copies from dirstate if necessary

646

copies = _chain(cm, _dirstatecopies(b._repo, match))

646

copies = _chain(cm, _dirstatecopies(b._repo, match))

647

else:

647

else:

648

copies = _committedforwardcopies(a, b, base, match)

648

copies = _committedforwardcopies(a, b, base, match)

649

return copies

649

return copies

650

651

652

def _backwardrenames(a, b, match):

652

def _backwardrenames(a, b, match):

653

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

653

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

654

return {}

654

return {}

655

656

# Even though we're not taking copies into account, 1:n rename situations

656

# Even though we're not taking copies into account, 1:n rename situations

657

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

657

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

658

# arbitrarily pick one of the renames.

658

# arbitrarily pick one of the renames.

659

# We don't want to pass in "match" here, since that would filter

659

# We don't want to pass in "match" here, since that would filter

660

# the destination by it. Since we're reversing the copies, we want

660

# the destination by it. Since we're reversing the copies, we want

661

# to filter the source instead.

661

# to filter the source instead.

662

f = _forwardcopies(b, a)

662

f = _forwardcopies(b, a)

663

r = {}

663

r = {}

664

for k, v in sorted(pycompat.iteritems(f)):

664

for k, v in sorted(pycompat.iteritems(f)):

665

if match and not match(v):

665

if match and not match(v):

666

continue

666

continue

667

# remove copies

667

# remove copies

668

if v in a:

668

if v in a:

669

continue

669

continue

670

r[v] = k

670

r[v] = k

671

return r

671

return r

672

673

674

def pathcopies(x, y, match=None):

674

def pathcopies(x, y, match=None):

675

"""find {dst@y: src@x} copy mapping for directed compare"""

675

"""find {dst@y: src@x} copy mapping for directed compare"""

676

repo = x._repo

676

repo = x._repo

677

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

677

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

678

if debug:

678

if debug:

679

repo.ui.debug(

679

repo.ui.debug(

680

b'debug.copies: searching copies from %s to %s\n' % (x, y)

680

b'debug.copies: searching copies from %s to %s\n' % (x, y)

681

)

681

)

682

if x == y or not x or not y:

682

if x == y or not x or not y:

683

return {}

683

return {}

684

if y.rev() is None and x == y.p1():

684

if y.rev() is None and x == y.p1():

685

if debug:

685

if debug:

686

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

686

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

687

# short-circuit to avoid issues with merge states

687

# short-circuit to avoid issues with merge states

688

return _dirstatecopies(repo, match)

688

return _dirstatecopies(repo, match)

689

a = y.ancestor(x)

689

a = y.ancestor(x)

690

if a == x:

690

if a == x:

691

if debug:

691

if debug:

692

repo.ui.debug(b'debug.copies: search mode: forward\n')

692

repo.ui.debug(b'debug.copies: search mode: forward\n')

693

copies = _forwardcopies(x, y, match=match)

693

copies = _forwardcopies(x, y, match=match)

694

elif a == y:

694

elif a == y:

695

if debug:

695

if debug:

696

repo.ui.debug(b'debug.copies: search mode: backward\n')

696

repo.ui.debug(b'debug.copies: search mode: backward\n')

697

copies = _backwardrenames(x, y, match=match)

697

copies = _backwardrenames(x, y, match=match)

698

else:

698

else:

699

if debug:

699

if debug:

700

repo.ui.debug(b'debug.copies: search mode: combined\n')

700

repo.ui.debug(b'debug.copies: search mode: combined\n')

701

base = None

701

base = None

702

if a.rev() != nullrev:

702

if a.rev() != nullrev:

703

base = x

703

base = x

704

copies = _chain(

704

copies = _chain(

705

_backwardrenames(x, a, match=match),

705

_backwardrenames(x, a, match=match),

706

_forwardcopies(a, y, base, match=match),

706

_forwardcopies(a, y, base, match=match),

707

)

707

)

708

_filter(x, y, copies)

708

_filter(x, y, copies)

709

return copies

709

return copies

710

711

712

def mergecopies(repo, c1, c2, base):

712

def mergecopies(repo, c1, c2, base):

713

"""

713

"""

714

Finds moves and copies between context c1 and c2 that are relevant for

714

Finds moves and copies between context c1 and c2 that are relevant for

715

merging. 'base' will be used as the merge base.

715

merging. 'base' will be used as the merge base.

716

717

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

717

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

718

files that were moved/ copied in one merge parent and modified in another.

718

files that were moved/ copied in one merge parent and modified in another.

719

For example:

719

For example:

720

721

o ---> 4 another commit

721

o ---> 4 another commit

722

|

722

|

723

| o ---> 3 commit that modifies a.txt

723

| o ---> 3 commit that modifies a.txt

724

| /

724

| /

725

o / ---> 2 commit that moves a.txt to b.txt

725

o / ---> 2 commit that moves a.txt to b.txt

726

|/

726

|/

727

o ---> 1 merge base

727

o ---> 1 merge base

728

729

If we try to rebase revision 3 on revision 4, since there is no a.txt in

729

If we try to rebase revision 3 on revision 4, since there is no a.txt in

730

revision 4, and if user have copytrace disabled, we prints the following

730

revision 4, and if user have copytrace disabled, we prints the following

731

message:

731

message:

732

733

```other changed <file> which local deleted```

733

```other changed <file> which local deleted```

734

735

Returns a tuple where:

735

Returns a tuple where:

736

737

"branch_copies" an instance of branch_copies.

737

"branch_copies" an instance of branch_copies.

738

739

"diverge" is a mapping of source name -> list of destination names

739

"diverge" is a mapping of source name -> list of destination names

740

for divergent renames.

740

for divergent renames.

741

742

This function calls different copytracing algorithms based on config.

742

This function calls different copytracing algorithms based on config.

743

"""

743

"""

744

# avoid silly behavior for update from empty dir

744

# avoid silly behavior for update from empty dir

745

if not c1 or not c2 or c1 == c2:

745

if not c1 or not c2 or c1 == c2:

746

return branch_copies(), branch_copies(), {}

746

return branch_copies(), branch_copies(), {}

747

748

narrowmatch = c1.repo().narrowmatch()

748

narrowmatch = c1.repo().narrowmatch()

749

750

# avoid silly behavior for parent -> working dir

750

# avoid silly behavior for parent -> working dir

751

if c2.node() is None and c1.node() == repo.dirstate.p1():

751

if c2.node() is None and c1.node() == repo.dirstate.p1():

752

return (

752

return (

753

branch_copies(_dirstatecopies(repo, narrowmatch)),

753

branch_copies(_dirstatecopies(repo, narrowmatch)),

754

branch_copies(),

754

branch_copies(),

755

{},

755

{},

756

)

756

)

757

758

copytracing = repo.ui.config(b'experimental', b'copytrace')

758

copytracing = repo.ui.config(b'experimental', b'copytrace')

759

if stringutil.parsebool(copytracing) is False:

759

if stringutil.parsebool(copytracing) is False:

760

# stringutil.parsebool() returns None when it is unable to parse the

760

# stringutil.parsebool() returns None when it is unable to parse the

761

# value, so we should rely on making sure copytracing is on such cases

761

# value, so we should rely on making sure copytracing is on such cases

762

return branch_copies(), branch_copies(), {}

762

return branch_copies(), branch_copies(), {}

763

764

if usechangesetcentricalgo(repo):

764

if usechangesetcentricalgo(repo):

765

# The heuristics don't make sense when we need changeset-centric algos

765

# The heuristics don't make sense when we need changeset-centric algos

766

return _fullcopytracing(repo, c1, c2, base)

766

return _fullcopytracing(repo, c1, c2, base)

767

768

# Copy trace disabling is explicitly below the node == p1 logic above

768

# Copy trace disabling is explicitly below the node == p1 logic above

769

# because the logic above is required for a simple copy to be kept across a

769

# because the logic above is required for a simple copy to be kept across a

770

# rebase.

770

# rebase.

771

if copytracing == b'heuristics':

771

if copytracing == b'heuristics':

772

# Do full copytracing if only non-public revisions are involved as

772

# Do full copytracing if only non-public revisions are involved as

773

# that will be fast enough and will also cover the copies which could

773

# that will be fast enough and will also cover the copies which could

774

# be missed by heuristics

774

# be missed by heuristics

775

if _isfullcopytraceable(repo, c1, base):

775

if _isfullcopytraceable(repo, c1, base):

776

return _fullcopytracing(repo, c1, c2, base)

776

return _fullcopytracing(repo, c1, c2, base)

777

return _heuristicscopytracing(repo, c1, c2, base)

777

return _heuristicscopytracing(repo, c1, c2, base)

778

else:

778

else:

779

return _fullcopytracing(repo, c1, c2, base)

779

return _fullcopytracing(repo, c1, c2, base)

780

781

782

def _isfullcopytraceable(repo, c1, base):

782

def _isfullcopytraceable(repo, c1, base):

783

"""Checks that if base, source and destination are all no-public branches,

783

"""Checks that if base, source and destination are all no-public branches,

784

if yes let's use the full copytrace algorithm for increased capabilities

784

if yes let's use the full copytrace algorithm for increased capabilities

785

since it will be fast enough.

785

since it will be fast enough.

786

787

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

787

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

788

number of changesets from c1 to base such that if number of changesets are

788

number of changesets from c1 to base such that if number of changesets are

789

more than the limit, full copytracing algorithm won't be used.

789

more than the limit, full copytracing algorithm won't be used.

790

"""

790

"""

791

if c1.rev() is None:

791

if c1.rev() is None:

792

c1 = c1.p1()

792

c1 = c1.p1()

793

if c1.mutable() and base.mutable():

793

if c1.mutable() and base.mutable():

794

sourcecommitlimit = repo.ui.configint(

794

sourcecommitlimit = repo.ui.configint(

795

b'experimental', b'copytrace.sourcecommitlimit'

795

b'experimental', b'copytrace.sourcecommitlimit'

796

)

796

)

797

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

797

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

798

return commits < sourcecommitlimit

798

return commits < sourcecommitlimit

799

return False

799

return False

800

801

802

def _checksinglesidecopies(

802

def _checksinglesidecopies(

803

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

803

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

804

):

804

):

805

if src not in m2:

805

if src not in m2:

806

# deleted on side 2

806

# deleted on side 2

807

if src not in m1:

807

if src not in m1:

808

# renamed on side 1, deleted on side 2

808

# renamed on side 1, deleted on side 2

809

renamedelete[src] = dsts1

809

renamedelete[src] = dsts1

810

elif src not in mb:

810

elif src not in mb:

811

# Work around the "short-circuit to avoid issues with merge states"

811

# Work around the "short-circuit to avoid issues with merge states"

812

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

812

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

813

# destination doesn't exist in y.

813

# destination doesn't exist in y.

814

pass

814

pass

815

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

815

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

816

return

816

return

817

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

817

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

818

# modified on side 2

818

# modified on side 2

819

for dst in dsts1:

819

for dst in dsts1:

820

copy[dst] = src

820

copy[dst] = src

821

822

823

class branch_copies(object):

823

class branch_copies(object):

824

"""Information about copies made on one side of a merge/graft.

824

"""Information about copies made on one side of a merge/graft.

825

826

"copy" is a mapping from destination name -> source name,

826

"copy" is a mapping from destination name -> source name,

827

where source is in c1 and destination is in c2 or vice-versa.

827

where source is in c1 and destination is in c2 or vice-versa.

828

829

"movewithdir" is a mapping from source name -> destination name,

829

"movewithdir" is a mapping from source name -> destination name,

830

where the file at source present in one context but not the other

830

where the file at source present in one context but not the other

831

needs to be moved to destination by the merge process, because the

831

needs to be moved to destination by the merge process, because the

832

other context moved the directory it is in.

832

other context moved the directory it is in.

833

834

"renamedelete" is a mapping of source name -> list of destination

834

"renamedelete" is a mapping of source name -> list of destination

835

names for files deleted in c1 that were renamed in c2 or vice-versa.

835

names for files deleted in c1 that were renamed in c2 or vice-versa.

836

837

"dirmove" is a mapping of detected source dir -> destination dir renames.

837

"dirmove" is a mapping of detected source dir -> destination dir renames.

838

This is needed for handling changes to new files previously grafted into

838

This is needed for handling changes to new files previously grafted into

839

renamed directories.

839

renamed directories.

840

"""

840

"""

841

842

def __init__(

842

def __init__(

843

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

843

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

844

):

844

):

845

self.copy = {} if copy is None else copy

845

self.copy = {} if copy is None else copy

846

self.renamedelete = {} if renamedelete is None else renamedelete

846

self.renamedelete = {} if renamedelete is None else renamedelete

847

self.dirmove = {} if dirmove is None else dirmove

847

self.dirmove = {} if dirmove is None else dirmove

848

self.movewithdir = {} if movewithdir is None else movewithdir

848

self.movewithdir = {} if movewithdir is None else movewithdir

849

850

def __repr__(self):

850

def __repr__(self):

851

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

851

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

852

self.copy,

852

self.copy,

853

self.renamedelete,

853

self.renamedelete,

854

self.dirmove,

854

self.dirmove,

855

self.movewithdir,

855

self.movewithdir,

856

)

856

)

857

858

859

def _fullcopytracing(repo, c1, c2, base):

859

def _fullcopytracing(repo, c1, c2, base):

860

"""The full copytracing algorithm which finds all the new files that were

860

"""The full copytracing algorithm which finds all the new files that were

861

added from merge base up to the top commit and for each file it checks if

861

added from merge base up to the top commit and for each file it checks if

862

this file was copied from another file.

862

this file was copied from another file.

863

864

This is pretty slow when a lot of changesets are involved but will track all

864

This is pretty slow when a lot of changesets are involved but will track all

865

the copies.

865

the copies.

866

"""

866

"""

867

m1 = c1.manifest()

867

m1 = c1.manifest()

868

m2 = c2.manifest()

868

m2 = c2.manifest()

869

mb = base.manifest()

869

mb = base.manifest()

870

871

copies1 = pathcopies(base, c1)

871

copies1 = pathcopies(base, c1)

872

copies2 = pathcopies(base, c2)

872

copies2 = pathcopies(base, c2)

873

874

if not (copies1 or copies2):

874

if not (copies1 or copies2):

875

return branch_copies(), branch_copies(), {}

875

return branch_copies(), branch_copies(), {}

876

877

inversecopies1 = {}

877

inversecopies1 = {}

878

inversecopies2 = {}

878

inversecopies2 = {}

879

for dst, src in copies1.items():

879

for dst, src in copies1.items():

880

inversecopies1.setdefault(src, []).append(dst)

880

inversecopies1.setdefault(src, []).append(dst)

881

for dst, src in copies2.items():

881

for dst, src in copies2.items():

882

inversecopies2.setdefault(src, []).append(dst)

882

inversecopies2.setdefault(src, []).append(dst)

883

884

copy1 = {}

884

copy1 = {}

885

copy2 = {}

885

copy2 = {}

886

diverge = {}

886

diverge = {}

887

renamedelete1 = {}

887

renamedelete1 = {}

888

renamedelete2 = {}

888

renamedelete2 = {}

889

allsources = set(inversecopies1) | set(inversecopies2)

889

allsources = set(inversecopies1) | set(inversecopies2)

890

for src in allsources:

890

for src in allsources:

891

dsts1 = inversecopies1.get(src)

891

dsts1 = inversecopies1.get(src)

892

dsts2 = inversecopies2.get(src)

892

dsts2 = inversecopies2.get(src)

893

if dsts1 and dsts2:

893

if dsts1 and dsts2:

894

# copied/renamed on both sides

894

# copied/renamed on both sides

895

if src not in m1 and src not in m2:

895

if src not in m1 and src not in m2:

896

# renamed on both sides

896

# renamed on both sides

897

dsts1 = set(dsts1)

897

dsts1 = set(dsts1)

898

dsts2 = set(dsts2)

898

dsts2 = set(dsts2)

899

# If there's some overlap in the rename destinations, we

899

# If there's some overlap in the rename destinations, we

900

# consider it not divergent. For example, if side 1 copies 'a'

900

# consider it not divergent. For example, if side 1 copies 'a'

901

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

901

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

902

# and 'd' and deletes 'a'.

902

# and 'd' and deletes 'a'.

903

if dsts1 & dsts2:

903

if dsts1 & dsts2:

904

for dst in dsts1 & dsts2:

904

for dst in dsts1 & dsts2:

905

copy1[dst] = src

905

copy1[dst] = src

906

copy2[dst] = src

906

copy2[dst] = src

907

else:

907

else:

908

diverge[src] = sorted(dsts1 | dsts2)

908

diverge[src] = sorted(dsts1 | dsts2)

909

elif src in m1 and src in m2:

909

elif src in m1 and src in m2:

910

# copied on both sides

910

# copied on both sides

911

dsts1 = set(dsts1)

911

dsts1 = set(dsts1)

912

dsts2 = set(dsts2)

912

dsts2 = set(dsts2)

913

for dst in dsts1 & dsts2:

913

for dst in dsts1 & dsts2:

914

copy1[dst] = src

914

copy1[dst] = src

915

copy2[dst] = src

915

copy2[dst] = src

916

# TODO: Handle cases where it was renamed on one side and copied

916

# TODO: Handle cases where it was renamed on one side and copied

917

# on the other side

917

# on the other side

918

elif dsts1:

918

elif dsts1:

919

# copied/renamed only on side 1

919

# copied/renamed only on side 1

920

_checksinglesidecopies(

920

_checksinglesidecopies(

921

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

921

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

922

)

922

)

923

elif dsts2:

923

elif dsts2:

924

# copied/renamed only on side 2

924

# copied/renamed only on side 2

925

_checksinglesidecopies(

925

_checksinglesidecopies(

926

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

926

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

927

)

927

)

928

929

# find interesting file sets from manifests

929

# find interesting file sets from manifests

930

cache = []

930

cache = []

931

932

def _get_addedfiles(idx):

932

def _get_addedfiles(idx):

933

if not cache:

933

if not cache:

934

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

934

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

935

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

935

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

936

u1 = sorted(addedinm1 - addedinm2)

936

u1 = sorted(addedinm1 - addedinm2)

937

u2 = sorted(addedinm2 - addedinm1)

937

u2 = sorted(addedinm2 - addedinm1)

938

cache.extend((u1, u2))

938

cache.extend((u1, u2))

939

return cache[idx]

939

return cache[idx]

940

941

u1fn = lambda: _get_addedfiles(0)

941

u1fn = lambda: _get_addedfiles(0)

942

u2fn = lambda: _get_addedfiles(1)

942

u2fn = lambda: _get_addedfiles(1)

943

if repo.ui.debugflag:

943

if repo.ui.debugflag:

944

u1 = u1fn()

944

u1 = u1fn()

945

u2 = u2fn()

945

u2 = u2fn()

946

947

header = b" unmatched files in %s"

947

header = b" unmatched files in %s"

948

if u1:

948

if u1:

949

repo.ui.debug(

949

repo.ui.debug(

950

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

950

b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))

951

)

951

)

952

if u2:

952

if u2:

953

repo.ui.debug(

953

repo.ui.debug(

954

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

954

b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))

955

)

955

)

956

957

renamedeleteset = set()

957

renamedeleteset = set()

958

divergeset = set()

958

divergeset = set()

959

for dsts in diverge.values():

959

for dsts in diverge.values():

960

divergeset.update(dsts)

960

divergeset.update(dsts)

961

for dsts in renamedelete1.values():

961

for dsts in renamedelete1.values():

962

renamedeleteset.update(dsts)

962

renamedeleteset.update(dsts)

963

for dsts in renamedelete2.values():

963

for dsts in renamedelete2.values():

964

renamedeleteset.update(dsts)

964

renamedeleteset.update(dsts)

965

966

repo.ui.debug(

966

repo.ui.debug(

967

b" all copies found (* = to merge, ! = divergent, "

967

b" all copies found (* = to merge, ! = divergent, "

968

b"% = renamed and deleted):\n"

968

b"% = renamed and deleted):\n"

969

)

969

)

970

for side, copies in ((b"local", copies1), (b"remote", copies2)):

970

for side, copies in ((b"local", copies1), (b"remote", copies2)):

971

if not copies:

971

if not copies:

972

continue

972

continue

973

repo.ui.debug(b" on %s side:\n" % side)

973

repo.ui.debug(b" on %s side:\n" % side)

974

for f in sorted(copies):

974

for f in sorted(copies):

975

note = b""

975

note = b""

976

if f in copy1 or f in copy2:

976

if f in copy1 or f in copy2:

977

note += b"*"

977

note += b"*"

978

if f in divergeset:

978

if f in divergeset:

979

note += b"!"

979

note += b"!"

980

if f in renamedeleteset:

980

if f in renamedeleteset:

981

note += b"%"

981

note += b"%"

982

repo.ui.debug(

982

repo.ui.debug(

983

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

983

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

984

)

984

)

985

del renamedeleteset

985

del renamedeleteset

986

del divergeset

986

del divergeset

987

988

repo.ui.debug(b" checking for directory renames\n")

988

repo.ui.debug(b" checking for directory renames\n")

989

990

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

990

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)

991

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

991

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)

992

993

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

993

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

994

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

994

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

995

996

return branch_copies1, branch_copies2, diverge

996

return branch_copies1, branch_copies2, diverge

997

998

999

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

999

def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):

1000

"""Finds moved directories and files that should move with them.

1000

"""Finds moved directories and files that should move with them.

1001

1002

ctx: the context for one of the sides

1002

ctx: the context for one of the sides

1003

copy: files copied on the same side (as ctx)

1003

copy: files copied on the same side (as ctx)

1004

fullcopy: files copied on the same side (as ctx), including those that

1004

fullcopy: files copied on the same side (as ctx), including those that

1005

merge.manifestmerge() won't care about

1005

merge.manifestmerge() won't care about

1006

addedfilesfn: function returning added files on the other side (compared to

1006

addedfilesfn: function returning added files on the other side (compared to

1007

ctx)

1007

ctx)

1008

"""

1008

"""

1009

# generate a directory move map

1009

# generate a directory move map

1010

invalid = set()

1010

invalid = set()

1011

dirmove = {}

1011

dirmove = {}

1012

1013

# examine each file copy for a potential directory move, which is

1013

# examine each file copy for a potential directory move, which is

1014

# when all the files in a directory are moved to a new directory

1014

# when all the files in a directory are moved to a new directory

1015

for dst, src in pycompat.iteritems(fullcopy):

1015

for dst, src in pycompat.iteritems(fullcopy):

1016

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1016

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

1017

if dsrc in invalid:

1017

if dsrc in invalid:

1018

# already seen to be uninteresting

1018

# already seen to be uninteresting

1019

continue

1019

continue

1020

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1020

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

1021

# directory wasn't entirely moved locally

1021

# directory wasn't entirely moved locally

1022

invalid.add(dsrc)

1022

invalid.add(dsrc)

1023

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1023

elif dsrc in dirmove and dirmove[dsrc] != ddst:

1024

# files from the same directory moved to two different places

1024

# files from the same directory moved to two different places

1025

invalid.add(dsrc)

1025

invalid.add(dsrc)

1026

else:

1026

else:

1027

# looks good so far

1027

# looks good so far

1028

dirmove[dsrc] = ddst

1028

dirmove[dsrc] = ddst

1029

1030

for i in invalid:

1030

for i in invalid:

1031

if i in dirmove:

1031

if i in dirmove:

1032

del dirmove[i]

1032

del dirmove[i]

1033

del invalid

1033

del invalid

1034

1035

if not dirmove:

1035

if not dirmove:

1036

return {}, {}

1036

return {}, {}

1037

1038

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1038

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

1039

1040

for d in dirmove:

1040

for d in dirmove:

1041

repo.ui.debug(

1041

repo.ui.debug(

1042

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1042

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

1043

)

1043

)

1044

1045

movewithdir = {}

1045

movewithdir = {}

1046

# check unaccounted nonoverlapping files against directory moves

1046

# check unaccounted nonoverlapping files against directory moves

1047

for f in addedfilesfn():

1047

for f in addedfilesfn():

1048

if f not in fullcopy:

1048

if f not in fullcopy:

1049

for d in dirmove:

1049

for d in dirmove:

1050

if f.startswith(d):

1050

if f.startswith(d):

1051

# new file added in a directory that was moved, move it

1051

# new file added in a directory that was moved, move it

1052

df = dirmove[d] + f[len(d) :]

1052

df = dirmove[d] + f[len(d) :]

1053

if df not in copy:

1053

if df not in copy:

1054

movewithdir[f] = df

1054

movewithdir[f] = df

1055

repo.ui.debug(

1055

repo.ui.debug(

1056

b" pending file src: '%s' -> dst: '%s'\n"

1056

b" pending file src: '%s' -> dst: '%s'\n"

1057

% (f, df)

1057

% (f, df)

1058

)

1058

)

1059

break

1059

break

1060

1061

return dirmove, movewithdir

1061

return dirmove, movewithdir

1062

1063

1064

def _heuristicscopytracing(repo, c1, c2, base):

1064

def _heuristicscopytracing(repo, c1, c2, base):

1065

"""Fast copytracing using filename heuristics

1065

"""Fast copytracing using filename heuristics

1066

1067

Assumes that moves or renames are of following two types:

1067

Assumes that moves or renames are of following two types:

1068

1069

1) Inside a directory only (same directory name but different filenames)

1069

1) Inside a directory only (same directory name but different filenames)

1070

2) Move from one directory to another

1070

2) Move from one directory to another

1071

(same filenames but different directory names)

1071

(same filenames but different directory names)

1072

1073

Works only when there are no merge commits in the "source branch".

1073

Works only when there are no merge commits in the "source branch".

1074

Source branch is commits from base up to c2 not including base.

1074

Source branch is commits from base up to c2 not including base.

1075

1076

If merge is involved it fallbacks to _fullcopytracing().

1076

If merge is involved it fallbacks to _fullcopytracing().

1077

1078

Can be used by setting the following config:

1078

Can be used by setting the following config:

1079

1080

[experimental]

1080

[experimental]

1081

copytrace = heuristics

1081

copytrace = heuristics

1082

1083

In some cases the copy/move candidates found by heuristics can be very large

1083

In some cases the copy/move candidates found by heuristics can be very large

1084

in number and that will make the algorithm slow. The number of possible

1084

in number and that will make the algorithm slow. The number of possible

1085

candidates to check can be limited by using the config

1085

candidates to check can be limited by using the config

1086

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1086

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1087

"""

1087

"""

1088

1089

if c1.rev() is None:

1089

if c1.rev() is None:

1090

c1 = c1.p1()

1090

c1 = c1.p1()

1091

if c2.rev() is None:

1091

if c2.rev() is None:

1092

c2 = c2.p1()

1092

c2 = c2.p1()

1093

1094

changedfiles = set()

1094

changedfiles = set()

1095

m1 = c1.manifest()

1095

m1 = c1.manifest()

1096

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1096

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1097

# If base is not in c2 branch, we switch to fullcopytracing

1097

# If base is not in c2 branch, we switch to fullcopytracing

1098

repo.ui.debug(

1098

repo.ui.debug(

1099

b"switching to full copytracing as base is not "

1099

b"switching to full copytracing as base is not "

1100

b"an ancestor of c2\n"

1100

b"an ancestor of c2\n"

1101

)

1101

)

1102

return _fullcopytracing(repo, c1, c2, base)

1102

return _fullcopytracing(repo, c1, c2, base)

1103

1104

ctx = c2

1104

ctx = c2

1105

while ctx != base:

1105

while ctx != base:

1106

if len(ctx.parents()) == 2:

1106

if len(ctx.parents()) == 2:

1107

# To keep things simple let's not handle merges

1107

# To keep things simple let's not handle merges

1108

repo.ui.debug(b"switching to full copytracing because of merges\n")

1108

repo.ui.debug(b"switching to full copytracing because of merges\n")

1109

return _fullcopytracing(repo, c1, c2, base)

1109

return _fullcopytracing(repo, c1, c2, base)

1110

changedfiles.update(ctx.files())

1110

changedfiles.update(ctx.files())

1111

ctx = ctx.p1()

1111

ctx = ctx.p1()

1112

1113

copies2 = {}

1113

copies2 = {}

1114

cp = _forwardcopies(base, c2)

1114

cp = _forwardcopies(base, c2)

1115

for dst, src in pycompat.iteritems(cp):

1115

for dst, src in pycompat.iteritems(cp):

1116

if src in m1:

1116

if src in m1:

1117

copies2[dst] = src

1117

copies2[dst] = src

1118

1119

# file is missing if it isn't present in the destination, but is present in

1119

# file is missing if it isn't present in the destination, but is present in

1120

# the base and present in the source.

1120

# the base and present in the source.

1121

# Presence in the base is important to exclude added files, presence in the

1121

# Presence in the base is important to exclude added files, presence in the

1122

# source is important to exclude removed files.

1122

# source is important to exclude removed files.

1123

filt = lambda f: f not in m1 and f in base and f in c2

1123

filt = lambda f: f not in m1 and f in base and f in c2

1124

missingfiles = [f for f in changedfiles if filt(f)]

1124

missingfiles = [f for f in changedfiles if filt(f)]

1125

1126

copies1 = {}

1126

copies1 = {}

1127

if missingfiles:

1127

if missingfiles:

1128

basenametofilename = collections.defaultdict(list)

1128

basenametofilename = collections.defaultdict(list)

1129

dirnametofilename = collections.defaultdict(list)

1129

dirnametofilename = collections.defaultdict(list)

1130

1131

for f in m1.filesnotin(base.manifest()):

1131

for f in m1.filesnotin(base.manifest()):

1132

basename = os.path.basename(f)

1132

basename = os.path.basename(f)

1133

dirname = os.path.dirname(f)

1133

dirname = os.path.dirname(f)

1134

basenametofilename[basename].append(f)

1134

basenametofilename[basename].append(f)

1135

dirnametofilename[dirname].append(f)

1135

dirnametofilename[dirname].append(f)

1136

1137

for f in missingfiles:

1137

for f in missingfiles:

1138

basename = os.path.basename(f)

1138

basename = os.path.basename(f)

1139

dirname = os.path.dirname(f)

1139

dirname = os.path.dirname(f)

1140

samebasename = basenametofilename[basename]

1140

samebasename = basenametofilename[basename]

1141

samedirname = dirnametofilename[dirname]

1141

samedirname = dirnametofilename[dirname]

1142

movecandidates = samebasename + samedirname

1142

movecandidates = samebasename + samedirname

1143

# f is guaranteed to be present in c2, that's why

1143

# f is guaranteed to be present in c2, that's why

1144

# c2.filectx(f) won't fail

1144

# c2.filectx(f) won't fail

1145

f2 = c2.filectx(f)

1145

f2 = c2.filectx(f)

1146

# we can have a lot of candidates which can slow down the heuristics

1146

# we can have a lot of candidates which can slow down the heuristics

1147

# config value to limit the number of candidates moves to check

1147

# config value to limit the number of candidates moves to check

1148

maxcandidates = repo.ui.configint(

1148

maxcandidates = repo.ui.configint(

1149

b'experimental', b'copytrace.movecandidateslimit'

1149

b'experimental', b'copytrace.movecandidateslimit'

1150

)

1150

)

1151

1152

if len(movecandidates) > maxcandidates:

1152

if len(movecandidates) > maxcandidates:

1153

repo.ui.status(

1153

repo.ui.status(

1154

_(

1154

_(

1155

b"skipping copytracing for '%s', more "

1155

b"skipping copytracing for '%s', more "

1156

b"candidates than the limit: %d\n"

1156

b"candidates than the limit: %d\n"

1157

)

1157

)

1158

% (f, len(movecandidates))

1158

% (f, len(movecandidates))

1159

)

1159

)

1160

continue

1160

continue

1161

1162

for candidate in movecandidates:

1162

for candidate in movecandidates:

1163

f1 = c1.filectx(candidate)

1163

f1 = c1.filectx(candidate)

1164

if _related(f1, f2):

1164

if _related(f1, f2):

1165

# if there are a few related copies then we'll merge

1165

# if there are a few related copies then we'll merge

1166

# changes into all of them. This matches the behaviour

1166

# changes into all of them. This matches the behaviour

1167

# of upstream copytracing

1167

# of upstream copytracing

1168

copies1[candidate] = f

1168

copies1[candidate] = f

1169

1170

return branch_copies(copies1), branch_copies(copies2), {}

1170

return branch_copies(copies1), branch_copies(copies2), {}

1171

1172

1173

def _related(f1, f2):

1173

def _related(f1, f2):

1174

"""return True if f1 and f2 filectx have a common ancestor

1174

"""return True if f1 and f2 filectx have a common ancestor

1175

1176

Walk back to common ancestor to see if the two files originate

1176

Walk back to common ancestor to see if the two files originate

1177

from the same file. Since workingfilectx's rev() is None it messes

1177

from the same file. Since workingfilectx's rev() is None it messes

1178

up the integer comparison logic, hence the pre-step check for

1178

up the integer comparison logic, hence the pre-step check for

1179

None (f1 and f2 can only be workingfilectx's initially).

1179

None (f1 and f2 can only be workingfilectx's initially).

1180

"""

1180

"""

1181

1182

if f1 == f2:

1182

if f1 == f2:

1183

return True # a match

1183

return True # a match

1184

1185

g1, g2 = f1.ancestors(), f2.ancestors()

1185

g1, g2 = f1.ancestors(), f2.ancestors()

1186

try:

1186

try:

1187

f1r, f2r = f1.linkrev(), f2.linkrev()

1187

f1r, f2r = f1.linkrev(), f2.linkrev()

1188

1189

if f1r is None:

1189

if f1r is None:

1190

f1 = next(g1)

1190

f1 = next(g1)

1191

if f2r is None:

1191

if f2r is None:

1192

f2 = next(g2)

1192

f2 = next(g2)

1193

1194

while True:

1194

while True:

1195

f1r, f2r = f1.linkrev(), f2.linkrev()

1195

f1r, f2r = f1.linkrev(), f2.linkrev()

1196

if f1r > f2r:

1196

if f1r > f2r:

1197

f1 = next(g1)

1197

f1 = next(g1)

1198

elif f2r > f1r:

1198

elif f2r > f1r:

1199

f2 = next(g2)

1199

f2 = next(g2)

1200

else: # f1 and f2 point to files in the same linkrev

1200

else: # f1 and f2 point to files in the same linkrev

1201

return f1 == f2 # true if they point to the same file

1201

return f1 == f2 # true if they point to the same file

1202

except StopIteration:

1202

except StopIteration:

1203

return False

1203

return False

1204

1205

1206

def graftcopies(wctx, ctx, base):

1206

def graftcopies(wctx, ctx, base):

1207

"""reproduce copies between base and ctx in the wctx

1207

"""reproduce copies between base and ctx in the wctx

1208

1209

Unlike mergecopies(), this function will only consider copies between base

1209

Unlike mergecopies(), this function will only consider copies between base

1210

and ctx; it will ignore copies between base and wctx. Also unlike

1210

and ctx; it will ignore copies between base and wctx. Also unlike

1211

mergecopies(), this function will apply copies to the working copy (instead

1211

mergecopies(), this function will apply copies to the working copy (instead

1212

of just returning information about the copies). That makes it cheaper

1212

of just returning information about the copies). That makes it cheaper

1213

(especially in the common case of base==ctx.p1()) and useful also when

1213

(especially in the common case of base==ctx.p1()) and useful also when

1214

experimental.copytrace=off.

1214

experimental.copytrace=off.

1215

1216

merge.update() will have already marked most copies, but it will only

1216

merge.update() will have already marked most copies, but it will only

1217

mark copies if it thinks the source files are related (see

1217

mark copies if it thinks the source files are related (see

1218

merge._related()). It will also not mark copies if the file wasn't modified

1218

merge._related()). It will also not mark copies if the file wasn't modified

1219

on the local side. This function adds the copies that were "missed"

1219

on the local side. This function adds the copies that were "missed"

1220

by merge.update().

1220

by merge.update().

1221

"""

1221

"""

1222

new_copies = pathcopies(base, ctx)

1222

new_copies = pathcopies(base, ctx)

1223

_filter(wctx.p1(), wctx, new_copies)

1223

parent = wctx.p1()

1224

_filter(parent, wctx, new_copies)

1225

# extra filtering to drop copy information for files that existed before

1226

# the graft (otherwise we would create merge filelog for non-merge commit

1227

for dest, __ in list(new_copies.items()):

1228

if dest in parent:

1229

del new_copies[dest]

1224

for dst, src in pycompat.iteritems(new_copies):

1230

for dst, src in pycompat.iteritems(new_copies):

1225

wctx[dst].markcopied(src)

1231

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf8
             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .node import (
                 nullid,
                 nullrev,
             )
             from . import (
                 match as matchmod,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import (
                 flagutil,
                 sidedata as sidedatamod,
             )
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 6
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo, match):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 if rustmod is not None:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
                         else:
                             raw = None
                         return (p1, p2, raw)
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & HASCOPIESINFO:
                             changes = changelogrevision(rev).changes
                         else:
                             changes = None
                         return (p1, p2, changes)
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 # To track rename from "A" to B, we need to gather all parent → children
                 # edges that are contains in `::B` but not in `::A`.
                 #
                 #
                 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
                 # ::a`) and also all the "roots point", ie the parents of the exclusive set
                 # that belong to ::a. These are exactly all the revisions needed to express
                 # the parent → children we need to combine.
                 #
                 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
                 # excluding paths that leads to roots that are not ancestors of `a`. We
                 # keep this out of the explanation because it is hard enough without this special case..
                 parents = cl._uncheckedparentrevs
                 graph_roots = (nullrev, nullrev)
                 ancestors = cl.ancestors([a.rev()], inclusive=True)
                 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 roots = set()
                 has_graph_roots = False
                 # iterate over `only(B, A)`
                 for r in revs:
                     ps = parents(r)
                     if ps == graph_roots:
                         has_graph_roots = True
                     else:
                         p1, p2 = ps
                         # find all the "root points" (see larger comment above)
                         if p1 != nullrev and p1 in ancestors:
                             roots.add(p1)
                         if p2 != nullrev and p2 in ancestors:
                             roots.add(p2)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 if has_graph_roots:
                     # this deal with the special case mentionned in the [1] footnotes. We
                     # must filter out revisions that leads to non-common graphroots.
                     roots = list(roots)
                     m = min(roots)
                     h = [b.rev()]
                     roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
                     roots_to_head = set(roots_to_head)
                     revs = [r for r in revs if r in roots_to_head]
                 if repo.filecopiesmode == b'changeset-sidedata':
                     # When using side-data, we will process the edges "from" the children.
                     # We iterate over the childre, gathering previous collected data for
                     # the parents. Do know when the parents data is no longer necessary, we
                     # keep a counter of how many children each revision has.
                     #
                     # An interresting property of `children_count` is that it only contains
                     # revision that will be relevant for a edge of the graph. So if a
                     # children has parent not in `children_count`, that edges should not be
                     # processed.
                     children_count = dict((r, 0) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children_count[r] = 0
                             if p in children_count:
                                 children_count[p] += 1
                     revinfo = _revinfo_getter(repo, match)
                     return _combine_changeset_copies(
                         revs, children_count, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     # When not using side-data, we will process the edges "from" the parent.
                     # so we need a full mapping of the parent -> children relation.
                     children = dict((r, []) for r in roots)
                     for r in revs:
                         for p in cl.parentrevs(r):
                             if p == nullrev:
                                 continue
                             children[r] = []
                             if p in children:
                                 children[p].append(r)
                     x = revs.pop()
                     assert x == b.rev()
                     revs.extend(roots)
                     revs.sort()
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children_count, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children_count: a {parent: <number-of-relevant-children>} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None:
                     final_copies = rustmod.combine_changeset_copies(
                         list(revs), children_count, targetrev, revinfo, isancestor
                     )
                 else:
                     isancestor = cached_is_ancestor(isancestor)
                     all_copies = {}
                     # iterate over all the "children" side of copy tracing "edge"
                     for current_rev in revs:
                         p1, p2, changes = revinfo(current_rev)
                         current_copies = None
                         # iterate over all parents to chain the existing data with the
                         # data from the parent → child edge.
                         for parent, parent_rev in ((1, p1), (2, p2)):
                             if parent_rev == nullrev:
                                 continue
                             remaining_children = children_count.get(parent_rev)
                             if remaining_children is None:
                                 continue
                             remaining_children -= 1
                             children_count[parent_rev] = remaining_children
                             if remaining_children:
                                 copies = all_copies.get(parent_rev, None)
                             else:
                                 copies = all_copies.pop(parent_rev, None)
                             if copies is None:
                                 # this is a root
                                 newcopies = copies = {}
                             elif remaining_children:
                                 newcopies = copies.copy()
                             else:
                                 newcopies = copies
                             # chain the data in the edge with the existing data
                             if changes is not None:
                                 childcopies = {}
                                 if parent == 1:
                                     childcopies = changes.copied_from_p1
                                 elif parent == 2:
                                     childcopies = changes.copied_from_p2
                                 if childcopies:
                                     newcopies = copies.copy()
                                     for dest, source in pycompat.iteritems(childcopies):
                                         prev = copies.get(source)
                                         if prev is not None and prev[1] is not None:
                                             source = prev[1]
                                         newcopies[dest] = (current_rev, source)
                                     assert newcopies is not copies
                                 if changes.removed:
                                     for f in changes.removed:
                                         if f in newcopies:
                                             if newcopies is copies:
                                                 # copy on write to avoid affecting potential other
                                                 # branches.  when there are no other branches, this
                                                 # could be avoided.
                                                 newcopies = copies.copy()
                                             newcopies[f] = (current_rev, None)
                             # check potential need to combine the data from another parent (for
                             # that child). See comment below for details.
                             if current_copies is None:
                                 current_copies = newcopies
                             else:
                                 # we are the second parent to work on c, we need to merge our
                                 # work with the other.
                                 #
                                 # In case of conflict, parent 1 take precedence over parent 2.
                                 # This is an arbitrary choice made anew when implementing
                                 # changeset based copies. It was made without regards with
                                 # potential filelog related behavior.
                                 assert parent == 2
                                 current_copies = _merge_copies_dict(
                                     newcopies, current_copies, isancestor, changes
                                 )
                         all_copies[current_rev] = current_copies
                     # filter out internal details and return a {dest: source mapping}
                     final_copies = {}
                     for dest, (tt, source) in all_copies[targetrev].items():
                         if source is not None:
                             final_copies[dest] = source
                 if not alwaysmatch:
                     for filename in list(final_copies.keys()):
                         if not match(filename):
                             del final_copies[filename]
                 return final_copies
             # constant to decide which side to pick with _merge_copies_dict
             PICK_MINOR = 0
             PICK_MAJOR = 1
             PICK_EITHER = 2
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         pick = _compare_values(changes, isancestor, dest, other, value)
                         if pick == PICK_MAJOR:
                             minor[dest] = value
                 return minor
             def _compare_values(changes, isancestor, dest, minor, major):
                 """compare two value within a _merge_copies_dict loop iteration"""
                 major_tt, major_value = major
                 minor_tt, minor_value = minor
                 # evacuate some simple case first:
                 if major_tt == minor_tt:
                     # if it comes from the same revision it must be the same value
                     assert major_value == minor_value
                     return PICK_EITHER
                 elif major[1] == minor[1]:
                     return PICK_EITHER
                 # actual merging needed: content from "major" wins, unless it is older than
                 # the branch point or there is a merge
                 elif changes is not None and major[1] is None and dest in changes.salvaged:
                     return PICK_MINOR
                 elif changes is not None and minor[1] is None and dest in changes.salvaged:
                     return PICK_MAJOR
                 elif changes is not None and dest in changes.merged:
                     return PICK_MAJOR
                 elif not isancestor(major_tt, minor_tt):
                     if major[1] is not None:
                         return PICK_MAJOR
                     elif isancestor(minor_tt, major_tt):
                         return PICK_MAJOR
                 return PICK_MINOR
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 cache = []
                 def _get_addedfiles(idx):
                     if not cache:
                         addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                         addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                         u1 = sorted(addedinm1 - addedinm2)
                         u2 = sorted(addedinm2 - addedinm1)
                         cache.extend((u1, u2))
                     return cache[idx]
                 u1fn = lambda: _get_addedfiles(0)
                 u2fn = lambda: _get_addedfiles(1)
                 if repo.ui.debugflag:
                     u1 = u1fn()
                     u2 = u2fn()
                     header = b"  unmatched files in %s"
                     if u1:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
                         )
                     if u2:
                         repo.ui.debug(
                             b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
                         )
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfilesfn: function returning added files on the other side (compared to
                               ctx)
                 """
                 # generate a directory move map
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfilesfn():
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
-                _filter(wctx.p1(), wctx, new_copies)
+                parent = wctx.p1()
+                _filter(parent, wctx, new_copies)
+                # extra filtering to drop copy information for files that existed before
+                # the graft (otherwise we would create merge filelog for non-merge commit
+                for dest, __ in list(new_copies.items()):
+                    if dest in parent:
+                        del new_copies[dest]
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)