upstream/mercurial-mirror Commit - r46590:cf04af3a

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

policy,

20

policy,

21

pycompat,

21

pycompat,

22

util,

22

util,

23

)

23

)

24

25

26

from .utils import stringutil

26

from .utils import stringutil

27

28

from .revlogutils import flagutil

28

from .revlogutils import flagutil

29

30

rustmod = policy.importrust("copy_tracing")

30

rustmod = policy.importrust("copy_tracing")

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(prefix, suffix):

67

def _chain(prefix, suffix):

68

"""chain two sets of copies 'prefix' and 'suffix'"""

68

"""chain two sets of copies 'prefix' and 'suffix'"""

69

result = prefix.copy()

69

result = prefix.copy()

70

for key, value in pycompat.iteritems(suffix):

70

for key, value in pycompat.iteritems(suffix):

71

result[key] = prefix.get(value, value)

71

result[key] = prefix.get(value, value)

72

return result

72

return result

73

74

75

def _tracefile(fctx, am, basemf):

75

def _tracefile(fctx, am, basemf):

76

"""return file context that is the ancestor of fctx present in ancestor

76

"""return file context that is the ancestor of fctx present in ancestor

77

manifest am

77

manifest am

78

79

Note: we used to try and stop after a given limit, however checking if that

79

Note: we used to try and stop after a given limit, however checking if that

80

limit is reached turned out to be very expensive. we are better off

80

limit is reached turned out to be very expensive. we are better off

81

disabling that feature."""

81

disabling that feature."""

82

83

for f in fctx.ancestors():

83

for f in fctx.ancestors():

84

path = f.path()

84

path = f.path()

85

if am.get(path, None) == f.filenode():

85

if am.get(path, None) == f.filenode():

86

return path

86

return path

87

if basemf and basemf.get(path, None) == f.filenode():

87

if basemf and basemf.get(path, None) == f.filenode():

88

return path

88

return path

89

90

91

def _dirstatecopies(repo, match=None):

91

def _dirstatecopies(repo, match=None):

92

ds = repo.dirstate

92

ds = repo.dirstate

93

c = ds.copies().copy()

93

c = ds.copies().copy()

94

for k in list(c):

94

for k in list(c):

95

if ds[k] not in b'anm' or (match and not match(k)):

95

if ds[k] not in b'anm' or (match and not match(k)):

96

del c[k]

96

del c[k]

97

return c

97

return c

98

99

100

def _computeforwardmissing(a, b, match=None):

100

def _computeforwardmissing(a, b, match=None):

101

"""Computes which files are in b but not a.

101

"""Computes which files are in b but not a.

102

This is its own function so extensions can easily wrap this call to see what

102

This is its own function so extensions can easily wrap this call to see what

103

files _forwardcopies is about to process.

103

files _forwardcopies is about to process.

104

"""

104

"""

105

ma = a.manifest()

105

ma = a.manifest()

106

mb = b.manifest()

106

mb = b.manifest()

107

return mb.filesnotin(ma, match=match)

107

return mb.filesnotin(ma, match=match)

108

109

110

def usechangesetcentricalgo(repo):

110

def usechangesetcentricalgo(repo):

111

"""Checks if we should use changeset-centric copy algorithms"""

111

"""Checks if we should use changeset-centric copy algorithms"""

112

if repo.filecopiesmode == b'changeset-sidedata':

112

if repo.filecopiesmode == b'changeset-sidedata':

113

return True

113

return True

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

115

changesetsource = (b'changeset-only', b'compatibility')

115

changesetsource = (b'changeset-only', b'compatibility')

116

return readfrom in changesetsource

116

return readfrom in changesetsource

117

118

119

def _committedforwardcopies(a, b, base, match):

119

def _committedforwardcopies(a, b, base, match):

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

121

# files might have to be traced back to the fctx parent of the last

121

# files might have to be traced back to the fctx parent of the last

122

# one-side-only changeset, but not further back than that

122

# one-side-only changeset, but not further back than that

123

repo = a._repo

123

repo = a._repo

124

125

if usechangesetcentricalgo(repo):

125

if usechangesetcentricalgo(repo):

126

return _changesetforwardcopies(a, b, match)

126

return _changesetforwardcopies(a, b, match)

127

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

129

dbg = repo.ui.debug

129

dbg = repo.ui.debug

130

if debug:

130

if debug:

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

132

am = a.manifest()

132

am = a.manifest()

133

basemf = None if base is None else base.manifest()

133

basemf = None if base is None else base.manifest()

134

135

# find where new files came from

135

# find where new files came from

136

# we currently don't try to find where old files went, too expensive

136

# we currently don't try to find where old files went, too expensive

137

# this means we can miss a case like 'hg rm b; hg cp a b'

137

# this means we can miss a case like 'hg rm b; hg cp a b'

138

cm = {}

138

cm = {}

139

140

# Computing the forward missing is quite expensive on large manifests, since

140

# Computing the forward missing is quite expensive on large manifests, since

141

# it compares the entire manifests. We can optimize it in the common use

141

# it compares the entire manifests. We can optimize it in the common use

142

# case of computing what copies are in a commit versus its parent (like

142

# case of computing what copies are in a commit versus its parent (like

143

# during a rebase or histedit). Note, we exclude merge commits from this

143

# during a rebase or histedit). Note, we exclude merge commits from this

144

# optimization, since the ctx.files() for a merge commit is not correct for

144

# optimization, since the ctx.files() for a merge commit is not correct for

145

# this comparison.

145

# this comparison.

146

forwardmissingmatch = match

146

forwardmissingmatch = match

147

if b.p1() == a and b.p2().node() == node.nullid:

147

if b.p1() == a and b.p2().node() == node.nullid:

148

filesmatcher = matchmod.exact(b.files())

148

filesmatcher = matchmod.exact(b.files())

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

151

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

153

154

if debug:

154

if debug:

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

156

157

for f in sorted(missing):

157

for f in sorted(missing):

158

if debug:

158

if debug:

159

dbg(b'debug.copies: tracing file: %s\n' % f)

159

dbg(b'debug.copies: tracing file: %s\n' % f)

160

fctx = b[f]

160

fctx = b[f]

161

fctx._ancestrycontext = ancestrycontext

161

fctx._ancestrycontext = ancestrycontext

162

163

if debug:

163

if debug:

164

start = util.timer()

164

start = util.timer()

165

opath = _tracefile(fctx, am, basemf)

165

opath = _tracefile(fctx, am, basemf)

166

if opath:

166

if opath:

167

if debug:

167

if debug:

168

dbg(b'debug.copies: rename of: %s\n' % opath)

168

dbg(b'debug.copies: rename of: %s\n' % opath)

169

cm[f] = opath

169

cm[f] = opath

170

if debug:

170

if debug:

171

dbg(

171

dbg(

172

b'debug.copies: time: %f seconds\n'

172

b'debug.copies: time: %f seconds\n'

173

% (util.timer() - start)

173

% (util.timer() - start)

174

)

174

)

175

return cm

175

return cm

176

177

178

def _revinfo_getter(repo):

178

def _revinfo_getter(repo):

179

"""returns a function that returns the following data given a <rev>"

179

"""returns a function that returns the following data given a <rev>"

180

181

* p1: revision number of first parent

181

* p1: revision number of first parent

182

* p2: revision number of first parent

182

* p2: revision number of first parent

183

* changes: a ChangingFiles object

183

* changes: a ChangingFiles object

184

"""

184

"""

185

cl = repo.changelog

185

cl = repo.changelog

186

parents = cl.parentrevs

186

parents = cl.parentrevs

187

flags = cl.flags

187

flags = cl.flags

188

189

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

189

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

190

191

changelogrevision = cl.changelogrevision

191

changelogrevision = cl.changelogrevision

192

193

# A small cache to avoid doing the work twice for merges

193

# A small cache to avoid doing the work twice for merges

194

#

194

#

195

# In the vast majority of cases, if we ask information for a revision

195

# In the vast majority of cases, if we ask information for a revision

196

# about 1 parent, we'll later ask it for the other. So it make sense to

196

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# keep the information around when reaching the first parent of a merge

197

# keep the information around when reaching the first parent of a merge

198

# and dropping it after it was provided for the second parents.

198

# and dropping it after it was provided for the second parents.

199

#

199

#

200

# It exists cases were only one parent of the merge will be walked. It

200

# It exists cases were only one parent of the merge will be walked. It

201

# happens when the "destination" the copy tracing is descendant from a

201

# happens when the "destination" the copy tracing is descendant from a

202

# new root, not common with the "source". In that case, we will only walk

202

# new root, not common with the "source". In that case, we will only walk

203

# through merge parents that are descendant of changesets common

203

# through merge parents that are descendant of changesets common

204

# between "source" and "destination".

204

# between "source" and "destination".

205

#

205

#

206

# With the current case implementation if such changesets have a copy

206

# With the current case implementation if such changesets have a copy

207

# information, we'll keep them in memory until the end of

207

# information, we'll keep them in memory until the end of

208

# _changesetforwardcopies. We don't expect the case to be frequent

208

# _changesetforwardcopies. We don't expect the case to be frequent

209

# enough to matters.

209

# enough to matters.

210

#

210

#

211

# In addition, it would be possible to reach pathological case, were

211

# In addition, it would be possible to reach pathological case, were

212

# many first parent are met before any second parent is reached. In

212

# many first parent are met before any second parent is reached. In

213

# that case the cache could grow. If this even become an issue one can

213

# that case the cache could grow. If this even become an issue one can

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# time to save memory.

215

# time to save memory.

216

merge_caches = {}

216

merge_caches = {}

217

218

def revinfo(rev):

218

def revinfo(rev):

219

p1, p2 = parents(rev)

219

p1, p2 = parents(rev)

220

value = None

220

value = None

221

e = merge_caches.pop(rev, None)

221

e = merge_caches.pop(rev, None)

222

if e is not None:

222

if e is not None:

223

return e

223

return e

224

changes = None

224

changes = None

225

if flags(rev) & HASCOPIESINFO:

225

if flags(rev) & HASCOPIESINFO:

226

changes = changelogrevision(rev).changes

226

changes = changelogrevision(rev).changes

227

value = (p1, p2, changes)

227

value = (p1, p2, changes)

228

if p1 != node.nullrev and p2 != node.nullrev:

228

if p1 != node.nullrev and p2 != node.nullrev:

229

# XXX some case we over cache, IGNORE

229

# XXX some case we over cache, IGNORE

230

merge_caches[rev] = value

230

merge_caches[rev] = value

231

return value

231

return value

232

233

return revinfo

233

return revinfo

234

235

236

def cached_is_ancestor(is_ancestor):

236

def cached_is_ancestor(is_ancestor):

237

"""return a cached version of is_ancestor"""

237

"""return a cached version of is_ancestor"""

238

cache = {}

238

cache = {}

239

240

def _is_ancestor(anc, desc):

240

def _is_ancestor(anc, desc):

241

if anc > desc:

241

if anc > desc:

242

return False

242

return False

243

elif anc == desc:

243

elif anc == desc:

244

return True

244

return True

245

key = (anc, desc)

245

key = (anc, desc)

246

ret = cache.get(key)

246

ret = cache.get(key)

247

if ret is None:

247

if ret is None:

248

ret = cache[key] = is_ancestor(anc, desc)

248

ret = cache[key] = is_ancestor(anc, desc)

249

return ret

249

return ret

250

251

return _is_ancestor

251

return _is_ancestor

252

253

254

def _changesetforwardcopies(a, b, match):

254

def _changesetforwardcopies(a, b, match):

255

if a.rev() in (node.nullrev, b.rev()):

255

if a.rev() in (node.nullrev, b.rev()):

256

return {}

256

return {}

257

258

repo = a.repo().unfiltered()

258

repo = a.repo().unfiltered()

259

children = {}

259

children = {}

260

261

cl = repo.changelog

261

cl = repo.changelog

262

isancestor = cl.isancestorrev

262

isancestor = cl.isancestorrev

263

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

263

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

264

mrset = set(missingrevs)

264

mrset = set(missingrevs)

265

roots = set()

265

roots = set()

266

for r in missingrevs:

266

for r in missingrevs:

267

for p in cl.parentrevs(r):

267

for p in cl.parentrevs(r):

268

if p == node.nullrev:

268

if p == node.nullrev:

269

continue

269

continue

270

if p not in children:

270

if p not in children:

271

children[p] = [r]

271

children[p] = [r]

272

else:

272

else:

273

children[p].append(r)

273

children[p].append(r)

274

if p not in mrset:

274

if p not in mrset:

275

roots.add(p)

275

roots.add(p)

276

if not roots:

276

if not roots:

277

# no common revision to track copies from

277

# no common revision to track copies from

278

return {}

278

return {}

279

min_root = min(roots)

279

min_root = min(roots)

280

281

from_head = set(

281

from_head = set(

282

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

282

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

283

)

283

)

284

285

iterrevs = set(from_head)

285

iterrevs = set(from_head)

286

iterrevs &= mrset

286

iterrevs &= mrset

287

iterrevs.update(roots)

287

iterrevs.update(roots)

288

iterrevs.remove(b.rev())

288

iterrevs.remove(b.rev())

289

revs = sorted(iterrevs)

289

revs = sorted(iterrevs)

290

291

if repo.filecopiesmode == b'changeset-sidedata':

291

if repo.filecopiesmode == b'changeset-sidedata':

292

revinfo = _revinfo_getter(repo)

292

revinfo = _revinfo_getter(repo)

293

return _combine_changeset_copies(

293

return _combine_changeset_copies(

294

revs, children, b.rev(), revinfo, match, isancestor

294

revs, children, b.rev(), revinfo, match, isancestor

295

)

295

)

296

else:

296

else:

297

revinfo = _revinfo_getter_extra(repo)

297

revinfo = _revinfo_getter_extra(repo)

298

return _combine_changeset_copies_extra(

298

return _combine_changeset_copies_extra(

299

revs, children, b.rev(), revinfo, match, isancestor

299

revs, children, b.rev(), revinfo, match, isancestor

300

)

300

)

301

302

303

def _combine_changeset_copies(

303

def _combine_changeset_copies(

304

revs, children, targetrev, revinfo, match, isancestor

304

revs, children, targetrev, revinfo, match, isancestor

305

):

305

):

306

"""combine the copies information for each item of iterrevs

306

"""combine the copies information for each item of iterrevs

307

308

revs: sorted iterable of revision to visit

308

revs: sorted iterable of revision to visit

309

children: a {parent: [children]} mapping.

309

children: a {parent: [children]} mapping.

310

targetrev: the final copies destination revision (not in iterrevs)

310

targetrev: the final copies destination revision (not in iterrevs)

311

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

311

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

312

match: a matcher

312

match: a matcher

313

314

It returns the aggregated copies information for `targetrev`.

314

It returns the aggregated copies information for `targetrev`.

315

"""

315

"""

316

317

alwaysmatch = match.always()

317

alwaysmatch = match.always()

318

319

if rustmod is not None and alwaysmatch:

319

if rustmod is not None and alwaysmatch:

320

return rustmod.combine_changeset_copies(

320

return rustmod.combine_changeset_copies(

321

list(revs), children, targetrev, revinfo, isancestor

321

list(revs), children, targetrev, revinfo, isancestor

322

)

322

)

323

324

isancestor = cached_is_ancestor(isancestor)

324

isancestor = cached_is_ancestor(isancestor)

325

326

all_copies = {}

326

all_copies = {}

327

for r in revs:

327

for r in revs:

328

copies = all_copies.pop(r, None)

328

copies = all_copies.pop(r, None)

329

if copies is None:

329

if copies is None:

330

# this is a root

330

# this is a root

331

copies = {}

331

copies = {}

332

for i, c in enumerate(children[r]):

332

for i, c in enumerate(children[r]):

333

p1, p2, changes = revinfo(c)

333

p1, p2, changes = revinfo(c)

334

childcopies = {}

334

childcopies = {}

335

if r == p1:

335

if r == p1:

336

parent = 1

336

parent = 1

337

if changes is not None:

337

if changes is not None:

338

childcopies = changes.copied_from_p1

338

childcopies = changes.copied_from_p1

339

else:

339

else:

340

assert r == p2

340

assert r == p2

341

parent = 2

341

parent = 2

342

if changes is not None:

342

if changes is not None:

343

childcopies = changes.copied_from_p2

343

childcopies = changes.copied_from_p2

344

if not alwaysmatch:

344

if not alwaysmatch:

345

childcopies = {

345

childcopies = {

346

dst: src for dst, src in childcopies.items() if match(dst)

346

dst: src for dst, src in childcopies.items() if match(dst)

347

}

347

}

348

newcopies = copies

348

newcopies = copies

349

if childcopies:

349

if childcopies:

350

newcopies = copies.copy()

350

newcopies = copies.copy()

351

for dest, source in pycompat.iteritems(childcopies):

351

for dest, source in pycompat.iteritems(childcopies):

352

prev = copies.get(source)

352

prev = copies.get(source)

353

if prev is not None and prev[1] is not None:

353

if prev is not None and prev[1] is not None:

354

source = prev[1]

354

source = prev[1]

355

newcopies[dest] = (c, source)

355

newcopies[dest] = (c, source)

356

assert newcopies is not copies

356

assert newcopies is not copies

357

if changes is not None:

357

if changes is not None:

358

for f in changes.removed:

358

for f in changes.removed:

359

if f in newcopies:

359

if f in newcopies:

360

if newcopies is copies:

360

if newcopies is copies:

361

# copy on write to avoid affecting potential other

361

# copy on write to avoid affecting potential other

362

# branches. when there are no other branches, this

362

# branches. when there are no other branches, this

363

# could be avoided.

363

# could be avoided.

364

newcopies = copies.copy()

364

newcopies = copies.copy()

365

newcopies[f] = (c, None)

365

newcopies[f] = (c, None)

366

othercopies = all_copies.get(c)

366

othercopies = all_copies.get(c)

367

if othercopies is None:

367

if othercopies is None:

368

all_copies[c] = newcopies

368

all_copies[c] = newcopies

369

elif newcopies is othercopies:

370

# nothing to merge:

371

pass

369

else:

372

else:

370

# we are the second parent to work on c, we need to merge our

373

# we are the second parent to work on c, we need to merge our

371

# work with the other.

374

# work with the other.

372

#

375

#

373

# In case of conflict, parent 1 take precedence over parent 2.

376

# In case of conflict, parent 1 take precedence over parent 2.

374

# This is an arbitrary choice made anew when implementing

377

# This is an arbitrary choice made anew when implementing

375

# changeset based copies. It was made without regards with

378

# changeset based copies. It was made without regards with

376

# potential filelog related behavior.

379

# potential filelog related behavior.

377

if parent == 1:

380

if parent == 1:

378

_merge_copies_dict(

381

_merge_copies_dict(

379

othercopies, newcopies, isancestor, changes

382

othercopies, newcopies, isancestor, changes

380

)

383

)

381

else:

384

else:

382

_merge_copies_dict(

385

_merge_copies_dict(

383

newcopies, othercopies, isancestor, changes

386

newcopies, othercopies, isancestor, changes

384

)

387

)

385

all_copies[c] = newcopies

388

all_copies[c] = newcopies

386

389

387

final_copies = {}

390

final_copies = {}

388

for dest, (tt, source) in all_copies[targetrev].items():

391

for dest, (tt, source) in all_copies[targetrev].items():

389

if source is not None:

392

if source is not None:

390

final_copies[dest] = source

393

final_copies[dest] = source

391

return final_copies

394

return final_copies

392

395

393

396

394

def _merge_copies_dict(minor, major, isancestor, changes):

397

def _merge_copies_dict(minor, major, isancestor, changes):

395

"""merge two copies-mapping together, minor and major

398

"""merge two copies-mapping together, minor and major

396

399

397

In case of conflict, value from "major" will be picked.

400

In case of conflict, value from "major" will be picked.

398

401

399

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

402

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

400

ancestors of `high_rev`,

403

ancestors of `high_rev`,

401

404

402

- `ismerged(path)`: callable return True if `path` have been merged in the

405

- `ismerged(path)`: callable return True if `path` have been merged in the

403

current revision,

406

current revision,

404

"""

407

"""

405

for dest, value in major.items():

408

for dest, value in major.items():

406

other = minor.get(dest)

409

other = minor.get(dest)

407

if other is None:

410

if other is None:

408

minor[dest] = value

411

minor[dest] = value

409

else:

412

else:

410

new_tt = value[0]

413

new_tt = value[0]

411

other_tt = other[0]

414

other_tt = other[0]

412

if value[1] == other[1]:

415

if value[1] == other[1]:

413

continue

416

continue

414

# content from "major" wins, unless it is older

417

# content from "major" wins, unless it is older

415

# than the branch point or there is a merge

418

# than the branch point or there is a merge

416

if new_tt == other_tt:

419

if new_tt == other_tt:

417

minor[dest] = value

420

minor[dest] = value

418

elif (

421

elif (

419

changes is not None

422

changes is not None

420

and value[1] is None

423

and value[1] is None

421

and dest in changes.salvaged

424

and dest in changes.salvaged

422

):

425

):

423

pass

426

pass

424

elif (

427

elif (

425

changes is not None

428

changes is not None

426

and other[1] is None

429

and other[1] is None

427

and dest in changes.salvaged

430

and dest in changes.salvaged

428

):

431

):

429

minor[dest] = value

432

minor[dest] = value

430

elif changes is not None and dest in changes.merged:

433

elif changes is not None and dest in changes.merged:

431

minor[dest] = value

434

minor[dest] = value

432

elif not isancestor(new_tt, other_tt):

435

elif not isancestor(new_tt, other_tt):

433

if value[1] is not None:

436

if value[1] is not None:

434

minor[dest] = value

437

minor[dest] = value

435

elif isancestor(other_tt, new_tt):

438

elif isancestor(other_tt, new_tt):

436

minor[dest] = value

439

minor[dest] = value

437

440

438

441

439

def _revinfo_getter_extra(repo):

442

def _revinfo_getter_extra(repo):

440

"""return a function that return multiple data given a <rev>"i

443

"""return a function that return multiple data given a <rev>"i

441

444

442

* p1: revision number of first parent

445

* p1: revision number of first parent

443

* p2: revision number of first parent

446

* p2: revision number of first parent

444

* p1copies: mapping of copies from p1

447

* p1copies: mapping of copies from p1

445

* p2copies: mapping of copies from p2

448

* p2copies: mapping of copies from p2

446

* removed: a list of removed files

449

* removed: a list of removed files

447

* ismerged: a callback to know if file was merged in that revision

450

* ismerged: a callback to know if file was merged in that revision

448

"""

451

"""

449

cl = repo.changelog

452

cl = repo.changelog

450

parents = cl.parentrevs

453

parents = cl.parentrevs

451

454

452

def get_ismerged(rev):

455

def get_ismerged(rev):

453

ctx = repo[rev]

456

ctx = repo[rev]

454

457

455

def ismerged(path):

458

def ismerged(path):

456

if path not in ctx.files():

459

if path not in ctx.files():

457

return False

460

return False

458

fctx = ctx[path]

461

fctx = ctx[path]

459

parents = fctx._filelog.parents(fctx._filenode)

462

parents = fctx._filelog.parents(fctx._filenode)

460

nb_parents = 0

463

nb_parents = 0

461

for n in parents:

464

for n in parents:

462

if n != node.nullid:

465

if n != node.nullid:

463

nb_parents += 1

466

nb_parents += 1

464

return nb_parents >= 2

467

return nb_parents >= 2

465

468

466

return ismerged

469

return ismerged

467

470

468

def revinfo(rev):

471

def revinfo(rev):

469

p1, p2 = parents(rev)

472

p1, p2 = parents(rev)

470

ctx = repo[rev]

473

ctx = repo[rev]

471

p1copies, p2copies = ctx._copies

474

p1copies, p2copies = ctx._copies

472

removed = ctx.filesremoved()

475

removed = ctx.filesremoved()

473

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

476

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

474

477

475

return revinfo

478

return revinfo

476

479

477

480

478

def _combine_changeset_copies_extra(

481

def _combine_changeset_copies_extra(

479

revs, children, targetrev, revinfo, match, isancestor

482

revs, children, targetrev, revinfo, match, isancestor

480

):

483

):

481

"""version of `_combine_changeset_copies` that works with the Google

484

"""version of `_combine_changeset_copies` that works with the Google

482

specific "extra" based storage for copy information"""

485

specific "extra" based storage for copy information"""

483

all_copies = {}

486

all_copies = {}

484

alwaysmatch = match.always()

487

alwaysmatch = match.always()

485

for r in revs:

488

for r in revs:

486

copies = all_copies.pop(r, None)

489

copies = all_copies.pop(r, None)

487

if copies is None:

490

if copies is None:

488

# this is a root

491

# this is a root

489

copies = {}

492

copies = {}

490

for i, c in enumerate(children[r]):

493

for i, c in enumerate(children[r]):

491

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

494

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

492

if r == p1:

495

if r == p1:

493

parent = 1

496

parent = 1

494

childcopies = p1copies

497

childcopies = p1copies

495

else:

498

else:

496

assert r == p2

499

assert r == p2

497

parent = 2

500

parent = 2

498

childcopies = p2copies

501

childcopies = p2copies

499

if not alwaysmatch:

502

if not alwaysmatch:

500

childcopies = {

503

childcopies = {

501

dst: src for dst, src in childcopies.items() if match(dst)

504

dst: src for dst, src in childcopies.items() if match(dst)

502

}

505

}

503

newcopies = copies

506

newcopies = copies

504

if childcopies:

507

if childcopies:

505

newcopies = copies.copy()

508

newcopies = copies.copy()

506

for dest, source in pycompat.iteritems(childcopies):

509

for dest, source in pycompat.iteritems(childcopies):

507

prev = copies.get(source)

510

prev = copies.get(source)

508

if prev is not None and prev[1] is not None:

511

if prev is not None and prev[1] is not None:

509

source = prev[1]

512

source = prev[1]

510

newcopies[dest] = (c, source)

513

newcopies[dest] = (c, source)

511

assert newcopies is not copies

514

assert newcopies is not copies

512

for f in removed:

515

for f in removed:

513

if f in newcopies:

516

if f in newcopies:

514

if newcopies is copies:

517

if newcopies is copies:

515

# copy on write to avoid affecting potential other

518

# copy on write to avoid affecting potential other

516

# branches. when there are no other branches, this

519

# branches. when there are no other branches, this

517

# could be avoided.

520

# could be avoided.

518

newcopies = copies.copy()

521

newcopies = copies.copy()

519

newcopies[f] = (c, None)

522

newcopies[f] = (c, None)

520

othercopies = all_copies.get(c)

523

othercopies = all_copies.get(c)

521

if othercopies is None:

524

if othercopies is None:

522

all_copies[c] = newcopies

525

all_copies[c] = newcopies

523

else:

526

else:

524

# we are the second parent to work on c, we need to merge our

527

# we are the second parent to work on c, we need to merge our

525

# work with the other.

528

# work with the other.

526

#

529

#

527

# In case of conflict, parent 1 take precedence over parent 2.

530

# In case of conflict, parent 1 take precedence over parent 2.

528

# This is an arbitrary choice made anew when implementing

531

# This is an arbitrary choice made anew when implementing

529

# changeset based copies. It was made without regards with

532

# changeset based copies. It was made without regards with

530

# potential filelog related behavior.

533

# potential filelog related behavior.

531

if parent == 1:

534

if parent == 1:

532

_merge_copies_dict_extra(

535

_merge_copies_dict_extra(

533

othercopies, newcopies, isancestor, ismerged

536

othercopies, newcopies, isancestor, ismerged

534

)

537

)

535

else:

538

else:

536

_merge_copies_dict_extra(

539

_merge_copies_dict_extra(

537

newcopies, othercopies, isancestor, ismerged

540

newcopies, othercopies, isancestor, ismerged

538

)

541

)

539

all_copies[c] = newcopies

542

all_copies[c] = newcopies

540

543

541

final_copies = {}

544

final_copies = {}

542

for dest, (tt, source) in all_copies[targetrev].items():

545

for dest, (tt, source) in all_copies[targetrev].items():

543

if source is not None:

546

if source is not None:

544

final_copies[dest] = source

547

final_copies[dest] = source

545

return final_copies

548

return final_copies

546

549

547

550

548

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

551

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

549

"""version of `_merge_copies_dict` that works with the Google

552

"""version of `_merge_copies_dict` that works with the Google

550

specific "extra" based storage for copy information"""

553

specific "extra" based storage for copy information"""

551

for dest, value in major.items():

554

for dest, value in major.items():

552

other = minor.get(dest)

555

other = minor.get(dest)

553

if other is None:

556

if other is None:

554

minor[dest] = value

557

minor[dest] = value

555

else:

558

else:

556

new_tt = value[0]

559

new_tt = value[0]

557

other_tt = other[0]

560

other_tt = other[0]

558

if value[1] == other[1]:

561

if value[1] == other[1]:

559

continue

562

continue

560

# content from "major" wins, unless it is older

563

# content from "major" wins, unless it is older

561

# than the branch point or there is a merge

564

# than the branch point or there is a merge

562

if (

565

if (

563

new_tt == other_tt

566

new_tt == other_tt

564

or not isancestor(new_tt, other_tt)

567

or not isancestor(new_tt, other_tt)

565

or ismerged(dest)

568

or ismerged(dest)

566

):

569

):

567

minor[dest] = value

570

minor[dest] = value

568

571

569

572

570

def _forwardcopies(a, b, base=None, match=None):

573

def _forwardcopies(a, b, base=None, match=None):

571

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

574

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

572

575

573

if base is None:

576

if base is None:

574

base = a

577

base = a

575

match = a.repo().narrowmatch(match)

578

match = a.repo().narrowmatch(match)

576

# check for working copy

579

# check for working copy

577

if b.rev() is None:

580

if b.rev() is None:

578

cm = _committedforwardcopies(a, b.p1(), base, match)

581

cm = _committedforwardcopies(a, b.p1(), base, match)

579

# combine copies from dirstate if necessary

582

# combine copies from dirstate if necessary

580

copies = _chain(cm, _dirstatecopies(b._repo, match))

583

copies = _chain(cm, _dirstatecopies(b._repo, match))

581

else:

584

else:

582

copies = _committedforwardcopies(a, b, base, match)

585

copies = _committedforwardcopies(a, b, base, match)

583

return copies

586

return copies

584

587

585

588

586

def _backwardrenames(a, b, match):

589

def _backwardrenames(a, b, match):

587

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

590

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

588

return {}

591

return {}

589

592

590

# Even though we're not taking copies into account, 1:n rename situations

593

# Even though we're not taking copies into account, 1:n rename situations

591

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

594

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

592

# arbitrarily pick one of the renames.

595

# arbitrarily pick one of the renames.

593

# We don't want to pass in "match" here, since that would filter

596

# We don't want to pass in "match" here, since that would filter

594

# the destination by it. Since we're reversing the copies, we want

597

# the destination by it. Since we're reversing the copies, we want

595

# to filter the source instead.

598

# to filter the source instead.

596

f = _forwardcopies(b, a)

599

f = _forwardcopies(b, a)

597

r = {}

600

r = {}

598

for k, v in sorted(pycompat.iteritems(f)):

601

for k, v in sorted(pycompat.iteritems(f)):

599

if match and not match(v):

602

if match and not match(v):

600

continue

603

continue

601

# remove copies

604

# remove copies

602

if v in a:

605

if v in a:

603

continue

606

continue

604

r[v] = k

607

r[v] = k

605

return r

608

return r

606

609

607

610

608

def pathcopies(x, y, match=None):

611

def pathcopies(x, y, match=None):

609

"""find {dst@y: src@x} copy mapping for directed compare"""

612

"""find {dst@y: src@x} copy mapping for directed compare"""

610

repo = x._repo

613

repo = x._repo

611

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

614

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

612

if debug:

615

if debug:

613

repo.ui.debug(

616

repo.ui.debug(

614

b'debug.copies: searching copies from %s to %s\n' % (x, y)

617

b'debug.copies: searching copies from %s to %s\n' % (x, y)

615

)

618

)

616

if x == y or not x or not y:

619

if x == y or not x or not y:

617

return {}

620

return {}

618

if y.rev() is None and x == y.p1():

621

if y.rev() is None and x == y.p1():

619

if debug:

622

if debug:

620

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

623

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

621

# short-circuit to avoid issues with merge states

624

# short-circuit to avoid issues with merge states

622

return _dirstatecopies(repo, match)

625

return _dirstatecopies(repo, match)

623

a = y.ancestor(x)

626

a = y.ancestor(x)

624

if a == x:

627

if a == x:

625

if debug:

628

if debug:

626

repo.ui.debug(b'debug.copies: search mode: forward\n')

629

repo.ui.debug(b'debug.copies: search mode: forward\n')

627

copies = _forwardcopies(x, y, match=match)

630

copies = _forwardcopies(x, y, match=match)

628

elif a == y:

631

elif a == y:

629

if debug:

632

if debug:

630

repo.ui.debug(b'debug.copies: search mode: backward\n')

633

repo.ui.debug(b'debug.copies: search mode: backward\n')

631

copies = _backwardrenames(x, y, match=match)

634

copies = _backwardrenames(x, y, match=match)

632

else:

635

else:

633

if debug:

636

if debug:

634

repo.ui.debug(b'debug.copies: search mode: combined\n')

637

repo.ui.debug(b'debug.copies: search mode: combined\n')

635

base = None

638

base = None

636

if a.rev() != node.nullrev:

639

if a.rev() != node.nullrev:

637

base = x

640

base = x

638

copies = _chain(

641

copies = _chain(

639

_backwardrenames(x, a, match=match),

642

_backwardrenames(x, a, match=match),

640

_forwardcopies(a, y, base, match=match),

643

_forwardcopies(a, y, base, match=match),

641

)

644

)

642

_filter(x, y, copies)

645

_filter(x, y, copies)

643

return copies

646

return copies

644

647

645

648

646

def mergecopies(repo, c1, c2, base):

649

def mergecopies(repo, c1, c2, base):

647

"""

650

"""

648

Finds moves and copies between context c1 and c2 that are relevant for

651

Finds moves and copies between context c1 and c2 that are relevant for

649

merging. 'base' will be used as the merge base.

652

merging. 'base' will be used as the merge base.

650

653

651

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

654

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

652

files that were moved/ copied in one merge parent and modified in another.

655

files that were moved/ copied in one merge parent and modified in another.

653

For example:

656

For example:

654

657

655

o ---> 4 another commit

658

o ---> 4 another commit

656

|

659

|

657

| o ---> 3 commit that modifies a.txt

660

| o ---> 3 commit that modifies a.txt

658

| /

661

| /

659

o / ---> 2 commit that moves a.txt to b.txt

662

o / ---> 2 commit that moves a.txt to b.txt

660

|/

663

|/

661

o ---> 1 merge base

664

o ---> 1 merge base

662

665

663

If we try to rebase revision 3 on revision 4, since there is no a.txt in

666

If we try to rebase revision 3 on revision 4, since there is no a.txt in

664

revision 4, and if user have copytrace disabled, we prints the following

667

revision 4, and if user have copytrace disabled, we prints the following

665

message:

668

message:

666

669

667

```other changed <file> which local deleted```

670

```other changed <file> which local deleted```

668

671

669

Returns a tuple where:

672

Returns a tuple where:

670

673

671

"branch_copies" an instance of branch_copies.

674

"branch_copies" an instance of branch_copies.

672

675

673

"diverge" is a mapping of source name -> list of destination names

676

"diverge" is a mapping of source name -> list of destination names

674

for divergent renames.

677

for divergent renames.

675

678

676

This function calls different copytracing algorithms based on config.

679

This function calls different copytracing algorithms based on config.

677

"""

680

"""

678

# avoid silly behavior for update from empty dir

681

# avoid silly behavior for update from empty dir

679

if not c1 or not c2 or c1 == c2:

682

if not c1 or not c2 or c1 == c2:

680

return branch_copies(), branch_copies(), {}

683

return branch_copies(), branch_copies(), {}

681

684

682

narrowmatch = c1.repo().narrowmatch()

685

narrowmatch = c1.repo().narrowmatch()

683

686

684

# avoid silly behavior for parent -> working dir

687

# avoid silly behavior for parent -> working dir

685

if c2.node() is None and c1.node() == repo.dirstate.p1():

688

if c2.node() is None and c1.node() == repo.dirstate.p1():

686

return (

689

return (

687

branch_copies(_dirstatecopies(repo, narrowmatch)),

690

branch_copies(_dirstatecopies(repo, narrowmatch)),

688

branch_copies(),

691

branch_copies(),

689

{},

692

{},

690

)

693

)

691

694

692

copytracing = repo.ui.config(b'experimental', b'copytrace')

695

copytracing = repo.ui.config(b'experimental', b'copytrace')

693

if stringutil.parsebool(copytracing) is False:

696

if stringutil.parsebool(copytracing) is False:

694

# stringutil.parsebool() returns None when it is unable to parse the

697

# stringutil.parsebool() returns None when it is unable to parse the

695

# value, so we should rely on making sure copytracing is on such cases

698

# value, so we should rely on making sure copytracing is on such cases

696

return branch_copies(), branch_copies(), {}

699

return branch_copies(), branch_copies(), {}

697

700

698

if usechangesetcentricalgo(repo):

701

if usechangesetcentricalgo(repo):

699

# The heuristics don't make sense when we need changeset-centric algos

702

# The heuristics don't make sense when we need changeset-centric algos

700

return _fullcopytracing(repo, c1, c2, base)

703

return _fullcopytracing(repo, c1, c2, base)

701

704

702

# Copy trace disabling is explicitly below the node == p1 logic above

705

# Copy trace disabling is explicitly below the node == p1 logic above

703

# because the logic above is required for a simple copy to be kept across a

706

# because the logic above is required for a simple copy to be kept across a

704

# rebase.

707

# rebase.

705

if copytracing == b'heuristics':

708

if copytracing == b'heuristics':

706

# Do full copytracing if only non-public revisions are involved as

709

# Do full copytracing if only non-public revisions are involved as

707

# that will be fast enough and will also cover the copies which could

710

# that will be fast enough and will also cover the copies which could

708

# be missed by heuristics

711

# be missed by heuristics

709

if _isfullcopytraceable(repo, c1, base):

712

if _isfullcopytraceable(repo, c1, base):

710

return _fullcopytracing(repo, c1, c2, base)

713

return _fullcopytracing(repo, c1, c2, base)

711

return _heuristicscopytracing(repo, c1, c2, base)

714

return _heuristicscopytracing(repo, c1, c2, base)

712

else:

715

else:

713

return _fullcopytracing(repo, c1, c2, base)

716

return _fullcopytracing(repo, c1, c2, base)

714

717

715

718

716

def _isfullcopytraceable(repo, c1, base):

719

def _isfullcopytraceable(repo, c1, base):

717

"""Checks that if base, source and destination are all no-public branches,

720

"""Checks that if base, source and destination are all no-public branches,

718

if yes let's use the full copytrace algorithm for increased capabilities

721

if yes let's use the full copytrace algorithm for increased capabilities

719

since it will be fast enough.

722

since it will be fast enough.

720

723

721

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

724

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

722

number of changesets from c1 to base such that if number of changesets are

725

number of changesets from c1 to base such that if number of changesets are

723

more than the limit, full copytracing algorithm won't be used.

726

more than the limit, full copytracing algorithm won't be used.

724

"""

727

"""

725

if c1.rev() is None:

728

if c1.rev() is None:

726

c1 = c1.p1()

729

c1 = c1.p1()

727

if c1.mutable() and base.mutable():

730

if c1.mutable() and base.mutable():

728

sourcecommitlimit = repo.ui.configint(

731

sourcecommitlimit = repo.ui.configint(

729

b'experimental', b'copytrace.sourcecommitlimit'

732

b'experimental', b'copytrace.sourcecommitlimit'

730

)

733

)

731

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

734

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

732

return commits < sourcecommitlimit

735

return commits < sourcecommitlimit

733

return False

736

return False

734

737

735

738

736

def _checksinglesidecopies(

739

def _checksinglesidecopies(

737

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

740

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

738

):

741

):

739

if src not in m2:

742

if src not in m2:

740

# deleted on side 2

743

# deleted on side 2

741

if src not in m1:

744

if src not in m1:

742

# renamed on side 1, deleted on side 2

745

# renamed on side 1, deleted on side 2

743

renamedelete[src] = dsts1

746

renamedelete[src] = dsts1

744

elif src not in mb:

747

elif src not in mb:

745

# Work around the "short-circuit to avoid issues with merge states"

748

# Work around the "short-circuit to avoid issues with merge states"

746

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

749

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

747

# destination doesn't exist in y.

750

# destination doesn't exist in y.

748

pass

751

pass

749

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

752

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

750

return

753

return

751

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

754

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

752

# modified on side 2

755

# modified on side 2

753

for dst in dsts1:

756

for dst in dsts1:

754

copy[dst] = src

757

copy[dst] = src

755

758

756

759

757

class branch_copies(object):

760

class branch_copies(object):

758

"""Information about copies made on one side of a merge/graft.

761

"""Information about copies made on one side of a merge/graft.

759

762

760

"copy" is a mapping from destination name -> source name,

763

"copy" is a mapping from destination name -> source name,

761

where source is in c1 and destination is in c2 or vice-versa.

764

where source is in c1 and destination is in c2 or vice-versa.

762

765

763

"movewithdir" is a mapping from source name -> destination name,

766

"movewithdir" is a mapping from source name -> destination name,

764

where the file at source present in one context but not the other

767

where the file at source present in one context but not the other

765

needs to be moved to destination by the merge process, because the

768

needs to be moved to destination by the merge process, because the

766

other context moved the directory it is in.

769

other context moved the directory it is in.

767

770

768

"renamedelete" is a mapping of source name -> list of destination

771

"renamedelete" is a mapping of source name -> list of destination

769

names for files deleted in c1 that were renamed in c2 or vice-versa.

772

names for files deleted in c1 that were renamed in c2 or vice-versa.

770

773

771

"dirmove" is a mapping of detected source dir -> destination dir renames.

774

"dirmove" is a mapping of detected source dir -> destination dir renames.

772

This is needed for handling changes to new files previously grafted into

775

This is needed for handling changes to new files previously grafted into

773

renamed directories.

776

renamed directories.

774

"""

777

"""

775

778

776

def __init__(

779

def __init__(

777

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

780

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

778

):

781

):

779

self.copy = {} if copy is None else copy

782

self.copy = {} if copy is None else copy

780

self.renamedelete = {} if renamedelete is None else renamedelete

783

self.renamedelete = {} if renamedelete is None else renamedelete

781

self.dirmove = {} if dirmove is None else dirmove

784

self.dirmove = {} if dirmove is None else dirmove

782

self.movewithdir = {} if movewithdir is None else movewithdir

785

self.movewithdir = {} if movewithdir is None else movewithdir

783

786

784

def __repr__(self):

787

def __repr__(self):

785

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

788

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

786

self.copy,

789

self.copy,

787

self.renamedelete,

790

self.renamedelete,

788

self.dirmove,

791

self.dirmove,

789

self.movewithdir,

792

self.movewithdir,

790

)

793

)

791

794

792

795

793

def _fullcopytracing(repo, c1, c2, base):

796

def _fullcopytracing(repo, c1, c2, base):

794

"""The full copytracing algorithm which finds all the new files that were

797

"""The full copytracing algorithm which finds all the new files that were

795

added from merge base up to the top commit and for each file it checks if

798

added from merge base up to the top commit and for each file it checks if

796

this file was copied from another file.

799

this file was copied from another file.

797

800

798

This is pretty slow when a lot of changesets are involved but will track all

801

This is pretty slow when a lot of changesets are involved but will track all

799

the copies.

802

the copies.

800

"""

803

"""

801

m1 = c1.manifest()

804

m1 = c1.manifest()

802

m2 = c2.manifest()

805

m2 = c2.manifest()

803

mb = base.manifest()

806

mb = base.manifest()

804

807

805

copies1 = pathcopies(base, c1)

808

copies1 = pathcopies(base, c1)

806

copies2 = pathcopies(base, c2)

809

copies2 = pathcopies(base, c2)

807

810

808

if not (copies1 or copies2):

811

if not (copies1 or copies2):

809

return branch_copies(), branch_copies(), {}

812

return branch_copies(), branch_copies(), {}

810

813

811

inversecopies1 = {}

814

inversecopies1 = {}

812

inversecopies2 = {}

815

inversecopies2 = {}

813

for dst, src in copies1.items():

816

for dst, src in copies1.items():

814

inversecopies1.setdefault(src, []).append(dst)

817

inversecopies1.setdefault(src, []).append(dst)

815

for dst, src in copies2.items():

818

for dst, src in copies2.items():

816

inversecopies2.setdefault(src, []).append(dst)

819

inversecopies2.setdefault(src, []).append(dst)

817

820

818

copy1 = {}

821

copy1 = {}

819

copy2 = {}

822

copy2 = {}

820

diverge = {}

823

diverge = {}

821

renamedelete1 = {}

824

renamedelete1 = {}

822

renamedelete2 = {}

825

renamedelete2 = {}

823

allsources = set(inversecopies1) | set(inversecopies2)

826

allsources = set(inversecopies1) | set(inversecopies2)

824

for src in allsources:

827

for src in allsources:

825

dsts1 = inversecopies1.get(src)

828

dsts1 = inversecopies1.get(src)

826

dsts2 = inversecopies2.get(src)

829

dsts2 = inversecopies2.get(src)

827

if dsts1 and dsts2:

830

if dsts1 and dsts2:

828

# copied/renamed on both sides

831

# copied/renamed on both sides

829

if src not in m1 and src not in m2:

832

if src not in m1 and src not in m2:

830

# renamed on both sides

833

# renamed on both sides

831

dsts1 = set(dsts1)

834

dsts1 = set(dsts1)

832

dsts2 = set(dsts2)

835

dsts2 = set(dsts2)

833

# If there's some overlap in the rename destinations, we

836

# If there's some overlap in the rename destinations, we

834

# consider it not divergent. For example, if side 1 copies 'a'

837

# consider it not divergent. For example, if side 1 copies 'a'

835

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

838

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

836

# and 'd' and deletes 'a'.

839

# and 'd' and deletes 'a'.

837

if dsts1 & dsts2:

840

if dsts1 & dsts2:

838

for dst in dsts1 & dsts2:

841

for dst in dsts1 & dsts2:

839

copy1[dst] = src

842

copy1[dst] = src

840

copy2[dst] = src

843

copy2[dst] = src

841

else:

844

else:

842

diverge[src] = sorted(dsts1 | dsts2)

845

diverge[src] = sorted(dsts1 | dsts2)

843

elif src in m1 and src in m2:

846

elif src in m1 and src in m2:

844

# copied on both sides

847

# copied on both sides

845

dsts1 = set(dsts1)

848

dsts1 = set(dsts1)

846

dsts2 = set(dsts2)

849

dsts2 = set(dsts2)

847

for dst in dsts1 & dsts2:

850

for dst in dsts1 & dsts2:

848

copy1[dst] = src

851

copy1[dst] = src

849

copy2[dst] = src

852

copy2[dst] = src

850

# TODO: Handle cases where it was renamed on one side and copied

853

# TODO: Handle cases where it was renamed on one side and copied

851

# on the other side

854

# on the other side

852

elif dsts1:

855

elif dsts1:

853

# copied/renamed only on side 1

856

# copied/renamed only on side 1

854

_checksinglesidecopies(

857

_checksinglesidecopies(

855

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

858

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

856

)

859

)

857

elif dsts2:

860

elif dsts2:

858

# copied/renamed only on side 2

861

# copied/renamed only on side 2

859

_checksinglesidecopies(

862

_checksinglesidecopies(

860

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

863

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

861

)

864

)

862

865

863

# find interesting file sets from manifests

866

# find interesting file sets from manifests

864

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

867

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

865

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

868

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

866

u1 = sorted(addedinm1 - addedinm2)

869

u1 = sorted(addedinm1 - addedinm2)

867

u2 = sorted(addedinm2 - addedinm1)

870

u2 = sorted(addedinm2 - addedinm1)

868

871

869

header = b" unmatched files in %s"

872

header = b" unmatched files in %s"

870

if u1:

873

if u1:

871

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

874

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

872

if u2:

875

if u2:

873

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

876

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

874

877

875

if repo.ui.debugflag:

878

if repo.ui.debugflag:

876

renamedeleteset = set()

879

renamedeleteset = set()

877

divergeset = set()

880

divergeset = set()

878

for dsts in diverge.values():

881

for dsts in diverge.values():

879

divergeset.update(dsts)

882

divergeset.update(dsts)

880

for dsts in renamedelete1.values():

883

for dsts in renamedelete1.values():

881

renamedeleteset.update(dsts)

884

renamedeleteset.update(dsts)

882

for dsts in renamedelete2.values():

885

for dsts in renamedelete2.values():

883

renamedeleteset.update(dsts)

886

renamedeleteset.update(dsts)

884

887

885

repo.ui.debug(

888

repo.ui.debug(

886

b" all copies found (* = to merge, ! = divergent, "

889

b" all copies found (* = to merge, ! = divergent, "

887

b"% = renamed and deleted):\n"

890

b"% = renamed and deleted):\n"

888

)

891

)

889

for side, copies in ((b"local", copies1), (b"remote", copies2)):

892

for side, copies in ((b"local", copies1), (b"remote", copies2)):

890

if not copies:

893

if not copies:

891

continue

894

continue

892

repo.ui.debug(b" on %s side:\n" % side)

895

repo.ui.debug(b" on %s side:\n" % side)

893

for f in sorted(copies):

896

for f in sorted(copies):

894

note = b""

897

note = b""

895

if f in copy1 or f in copy2:

898

if f in copy1 or f in copy2:

896

note += b"*"

899

note += b"*"

897

if f in divergeset:

900

if f in divergeset:

898

note += b"!"

901

note += b"!"

899

if f in renamedeleteset:

902

if f in renamedeleteset:

900

note += b"%"

903

note += b"%"

901

repo.ui.debug(

904

repo.ui.debug(

902

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

905

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

903

)

906

)

904

del renamedeleteset

907

del renamedeleteset

905

del divergeset

908

del divergeset

906

909

907

repo.ui.debug(b" checking for directory renames\n")

910

repo.ui.debug(b" checking for directory renames\n")

908

911

909

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

912

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

910

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

913

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

911

914

912

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

915

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

913

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

916

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

914

917

915

return branch_copies1, branch_copies2, diverge

918

return branch_copies1, branch_copies2, diverge

916

919

917

920

918

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

921

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

919

"""Finds moved directories and files that should move with them.

922

"""Finds moved directories and files that should move with them.

920

923

921

ctx: the context for one of the sides

924

ctx: the context for one of the sides

922

copy: files copied on the same side (as ctx)

925

copy: files copied on the same side (as ctx)

923

fullcopy: files copied on the same side (as ctx), including those that

926

fullcopy: files copied on the same side (as ctx), including those that

924

merge.manifestmerge() won't care about

927

merge.manifestmerge() won't care about

925

addedfiles: added files on the other side (compared to ctx)

928

addedfiles: added files on the other side (compared to ctx)

926

"""

929

"""

927

# generate a directory move map

930

# generate a directory move map

928

d = ctx.dirs()

931

d = ctx.dirs()

929

invalid = set()

932

invalid = set()

930

dirmove = {}

933

dirmove = {}

931

934

932

# examine each file copy for a potential directory move, which is

935

# examine each file copy for a potential directory move, which is

933

# when all the files in a directory are moved to a new directory

936

# when all the files in a directory are moved to a new directory

934

for dst, src in pycompat.iteritems(fullcopy):

937

for dst, src in pycompat.iteritems(fullcopy):

935

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

938

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

936

if dsrc in invalid:

939

if dsrc in invalid:

937

# already seen to be uninteresting

940

# already seen to be uninteresting

938

continue

941

continue

939

elif dsrc in d and ddst in d:

942

elif dsrc in d and ddst in d:

940

# directory wasn't entirely moved locally

943

# directory wasn't entirely moved locally

941

invalid.add(dsrc)

944

invalid.add(dsrc)

942

elif dsrc in dirmove and dirmove[dsrc] != ddst:

945

elif dsrc in dirmove and dirmove[dsrc] != ddst:

943

# files from the same directory moved to two different places

946

# files from the same directory moved to two different places

944

invalid.add(dsrc)

947

invalid.add(dsrc)

945

else:

948

else:

946

# looks good so far

949

# looks good so far

947

dirmove[dsrc] = ddst

950

dirmove[dsrc] = ddst

948

951

949

for i in invalid:

952

for i in invalid:

950

if i in dirmove:

953

if i in dirmove:

951

del dirmove[i]

954

del dirmove[i]

952

del d, invalid

955

del d, invalid

953

956

954

if not dirmove:

957

if not dirmove:

955

return {}, {}

958

return {}, {}

956

959

957

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

960

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

958

961

959

for d in dirmove:

962

for d in dirmove:

960

repo.ui.debug(

963

repo.ui.debug(

961

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

964

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

962

)

965

)

963

966

964

movewithdir = {}

967

movewithdir = {}

965

# check unaccounted nonoverlapping files against directory moves

968

# check unaccounted nonoverlapping files against directory moves

966

for f in addedfiles:

969

for f in addedfiles:

967

if f not in fullcopy:

970

if f not in fullcopy:

968

for d in dirmove:

971

for d in dirmove:

969

if f.startswith(d):

972

if f.startswith(d):

970

# new file added in a directory that was moved, move it

973

# new file added in a directory that was moved, move it

971

df = dirmove[d] + f[len(d) :]

974

df = dirmove[d] + f[len(d) :]

972

if df not in copy:

975

if df not in copy:

973

movewithdir[f] = df

976

movewithdir[f] = df

974

repo.ui.debug(

977

repo.ui.debug(

975

b" pending file src: '%s' -> dst: '%s'\n"

978

b" pending file src: '%s' -> dst: '%s'\n"

976

% (f, df)

979

% (f, df)

977

)

980

)

978

break

981

break

979

982

980

return dirmove, movewithdir

983

return dirmove, movewithdir

981

984

982

985

983

def _heuristicscopytracing(repo, c1, c2, base):

986

def _heuristicscopytracing(repo, c1, c2, base):

984

"""Fast copytracing using filename heuristics

987

"""Fast copytracing using filename heuristics

985

988

986

Assumes that moves or renames are of following two types:

989

Assumes that moves or renames are of following two types:

987

990

988

1) Inside a directory only (same directory name but different filenames)

991

1) Inside a directory only (same directory name but different filenames)

989

2) Move from one directory to another

992

2) Move from one directory to another

990

(same filenames but different directory names)

993

(same filenames but different directory names)

991

994

992

Works only when there are no merge commits in the "source branch".

995

Works only when there are no merge commits in the "source branch".

993

Source branch is commits from base up to c2 not including base.

996

Source branch is commits from base up to c2 not including base.

994

997

995

If merge is involved it fallbacks to _fullcopytracing().

998

If merge is involved it fallbacks to _fullcopytracing().

996

999

997

Can be used by setting the following config:

1000

Can be used by setting the following config:

998

1001

999

[experimental]

1002

[experimental]

1000

copytrace = heuristics

1003

copytrace = heuristics

1001

1004

1002

In some cases the copy/move candidates found by heuristics can be very large

1005

In some cases the copy/move candidates found by heuristics can be very large

1003

in number and that will make the algorithm slow. The number of possible

1006

in number and that will make the algorithm slow. The number of possible

1004

candidates to check can be limited by using the config

1007

candidates to check can be limited by using the config

1005

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1008

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1006

"""

1009

"""

1007

1010

1008

if c1.rev() is None:

1011

if c1.rev() is None:

1009

c1 = c1.p1()

1012

c1 = c1.p1()

1010

if c2.rev() is None:

1013

if c2.rev() is None:

1011

c2 = c2.p1()

1014

c2 = c2.p1()

1012

1015

1013

changedfiles = set()

1016

changedfiles = set()

1014

m1 = c1.manifest()

1017

m1 = c1.manifest()

1015

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1018

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1016

# If base is not in c2 branch, we switch to fullcopytracing

1019

# If base is not in c2 branch, we switch to fullcopytracing

1017

repo.ui.debug(

1020

repo.ui.debug(

1018

b"switching to full copytracing as base is not "

1021

b"switching to full copytracing as base is not "

1019

b"an ancestor of c2\n"

1022

b"an ancestor of c2\n"

1020

)

1023

)

1021

return _fullcopytracing(repo, c1, c2, base)

1024

return _fullcopytracing(repo, c1, c2, base)

1022

1025

1023

ctx = c2

1026

ctx = c2

1024

while ctx != base:

1027

while ctx != base:

1025

if len(ctx.parents()) == 2:

1028

if len(ctx.parents()) == 2:

1026

# To keep things simple let's not handle merges

1029

# To keep things simple let's not handle merges

1027

repo.ui.debug(b"switching to full copytracing because of merges\n")

1030

repo.ui.debug(b"switching to full copytracing because of merges\n")

1028

return _fullcopytracing(repo, c1, c2, base)

1031

return _fullcopytracing(repo, c1, c2, base)

1029

changedfiles.update(ctx.files())

1032

changedfiles.update(ctx.files())

1030

ctx = ctx.p1()

1033

ctx = ctx.p1()

1031

1034

1032

copies2 = {}

1035

copies2 = {}

1033

cp = _forwardcopies(base, c2)

1036

cp = _forwardcopies(base, c2)

1034

for dst, src in pycompat.iteritems(cp):

1037

for dst, src in pycompat.iteritems(cp):

1035

if src in m1:

1038

if src in m1:

1036

copies2[dst] = src

1039

copies2[dst] = src

1037

1040

1038

# file is missing if it isn't present in the destination, but is present in

1041

# file is missing if it isn't present in the destination, but is present in

1039

# the base and present in the source.

1042

# the base and present in the source.

1040

# Presence in the base is important to exclude added files, presence in the

1043

# Presence in the base is important to exclude added files, presence in the

1041

# source is important to exclude removed files.

1044

# source is important to exclude removed files.

1042

filt = lambda f: f not in m1 and f in base and f in c2

1045

filt = lambda f: f not in m1 and f in base and f in c2

1043

missingfiles = [f for f in changedfiles if filt(f)]

1046

missingfiles = [f for f in changedfiles if filt(f)]

1044

1047

1045

copies1 = {}

1048

copies1 = {}

1046

if missingfiles:

1049

if missingfiles:

1047

basenametofilename = collections.defaultdict(list)

1050

basenametofilename = collections.defaultdict(list)

1048

dirnametofilename = collections.defaultdict(list)

1051

dirnametofilename = collections.defaultdict(list)

1049

1052

1050

for f in m1.filesnotin(base.manifest()):

1053

for f in m1.filesnotin(base.manifest()):

1051

basename = os.path.basename(f)

1054

basename = os.path.basename(f)

1052

dirname = os.path.dirname(f)

1055

dirname = os.path.dirname(f)

1053

basenametofilename[basename].append(f)

1056

basenametofilename[basename].append(f)

1054

dirnametofilename[dirname].append(f)

1057

dirnametofilename[dirname].append(f)

1055

1058

1056

for f in missingfiles:

1059

for f in missingfiles:

1057

basename = os.path.basename(f)

1060

basename = os.path.basename(f)

1058

dirname = os.path.dirname(f)

1061

dirname = os.path.dirname(f)

1059

samebasename = basenametofilename[basename]

1062

samebasename = basenametofilename[basename]

1060

samedirname = dirnametofilename[dirname]

1063

samedirname = dirnametofilename[dirname]

1061

movecandidates = samebasename + samedirname

1064

movecandidates = samebasename + samedirname

1062

# f is guaranteed to be present in c2, that's why

1065

# f is guaranteed to be present in c2, that's why

1063

# c2.filectx(f) won't fail

1066

# c2.filectx(f) won't fail

1064

f2 = c2.filectx(f)

1067

f2 = c2.filectx(f)

1065

# we can have a lot of candidates which can slow down the heuristics

1068

# we can have a lot of candidates which can slow down the heuristics

1066

# config value to limit the number of candidates moves to check

1069

# config value to limit the number of candidates moves to check

1067

maxcandidates = repo.ui.configint(

1070

maxcandidates = repo.ui.configint(

1068

b'experimental', b'copytrace.movecandidateslimit'

1071

b'experimental', b'copytrace.movecandidateslimit'

1069

)

1072

)

1070

1073

1071

if len(movecandidates) > maxcandidates:

1074

if len(movecandidates) > maxcandidates:

1072

repo.ui.status(

1075

repo.ui.status(

1073

_(

1076

_(

1074

b"skipping copytracing for '%s', more "

1077

b"skipping copytracing for '%s', more "

1075

b"candidates than the limit: %d\n"

1078

b"candidates than the limit: %d\n"

1076

)

1079

)

1077

% (f, len(movecandidates))

1080

% (f, len(movecandidates))

1078

)

1081

)

1079

continue

1082

continue

1080

1083

1081

for candidate in movecandidates:

1084

for candidate in movecandidates:

1082

f1 = c1.filectx(candidate)

1085

f1 = c1.filectx(candidate)

1083

if _related(f1, f2):

1086

if _related(f1, f2):

1084

# if there are a few related copies then we'll merge

1087

# if there are a few related copies then we'll merge

1085

# changes into all of them. This matches the behaviour

1088

# changes into all of them. This matches the behaviour

1086

# of upstream copytracing

1089

# of upstream copytracing

1087

copies1[candidate] = f

1090

copies1[candidate] = f

1088

1091

1089

return branch_copies(copies1), branch_copies(copies2), {}

1092

return branch_copies(copies1), branch_copies(copies2), {}

1090

1093

1091

1094

1092

def _related(f1, f2):

1095

def _related(f1, f2):

1093

"""return True if f1 and f2 filectx have a common ancestor

1096

"""return True if f1 and f2 filectx have a common ancestor

1094

1097

1095

Walk back to common ancestor to see if the two files originate

1098

Walk back to common ancestor to see if the two files originate

1096

from the same file. Since workingfilectx's rev() is None it messes

1099

from the same file. Since workingfilectx's rev() is None it messes

1097

up the integer comparison logic, hence the pre-step check for

1100

up the integer comparison logic, hence the pre-step check for

1098

None (f1 and f2 can only be workingfilectx's initially).

1101

None (f1 and f2 can only be workingfilectx's initially).

1099

"""

1102

"""

1100

1103

1101

if f1 == f2:

1104

if f1 == f2:

1102

return True # a match

1105

return True # a match

1103

1106

1104

g1, g2 = f1.ancestors(), f2.ancestors()

1107

g1, g2 = f1.ancestors(), f2.ancestors()

1105

try:

1108

try:

1106

f1r, f2r = f1.linkrev(), f2.linkrev()

1109

f1r, f2r = f1.linkrev(), f2.linkrev()

1107

1110

1108

if f1r is None:

1111

if f1r is None:

1109

f1 = next(g1)

1112

f1 = next(g1)

1110

if f2r is None:

1113

if f2r is None:

1111

f2 = next(g2)

1114

f2 = next(g2)

1112

1115

1113

while True:

1116

while True:

1114

f1r, f2r = f1.linkrev(), f2.linkrev()

1117

f1r, f2r = f1.linkrev(), f2.linkrev()

1115

if f1r > f2r:

1118

if f1r > f2r:

1116

f1 = next(g1)

1119

f1 = next(g1)

1117

elif f2r > f1r:

1120

elif f2r > f1r:

1118

f2 = next(g2)

1121

f2 = next(g2)

1119

else: # f1 and f2 point to files in the same linkrev

1122

else: # f1 and f2 point to files in the same linkrev

1120

return f1 == f2 # true if they point to the same file

1123

return f1 == f2 # true if they point to the same file

1121

except StopIteration:

1124

except StopIteration:

1122

return False

1125

return False

1123

1126

1124

1127

1125

def graftcopies(wctx, ctx, base):

1128

def graftcopies(wctx, ctx, base):

1126

"""reproduce copies between base and ctx in the wctx

1129

"""reproduce copies between base and ctx in the wctx

1127

1130

1128

Unlike mergecopies(), this function will only consider copies between base

1131

Unlike mergecopies(), this function will only consider copies between base

1129

and ctx; it will ignore copies between base and wctx. Also unlike

1132

and ctx; it will ignore copies between base and wctx. Also unlike

1130

mergecopies(), this function will apply copies to the working copy (instead

1133

mergecopies(), this function will apply copies to the working copy (instead

1131

of just returning information about the copies). That makes it cheaper

1134

of just returning information about the copies). That makes it cheaper

1132

(especially in the common case of base==ctx.p1()) and useful also when

1135

(especially in the common case of base==ctx.p1()) and useful also when

1133

experimental.copytrace=off.

1136

experimental.copytrace=off.

1134

1137

1135

merge.update() will have already marked most copies, but it will only

1138

merge.update() will have already marked most copies, but it will only

1136

mark copies if it thinks the source files are related (see

1139

mark copies if it thinks the source files are related (see

1137

merge._related()). It will also not mark copies if the file wasn't modified

1140

merge._related()). It will also not mark copies if the file wasn't modified

1138

on the local side. This function adds the copies that were "missed"

1141

on the local side. This function adds the copies that were "missed"

1139

by merge.update().

1142

by merge.update().

1140

"""

1143

"""

1141

new_copies = pathcopies(base, ctx)

1144

new_copies = pathcopies(base, ctx)

1142

_filter(wctx.p1(), wctx, new_copies)

1145

_filter(wctx.p1(), wctx, new_copies)

1143

for dst, src in pycompat.iteritems(new_copies):

1146

for dst, src in pycompat.iteritems(new_copies):

1144

wctx[dst].markcopied(src)

1147

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import flagutil
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 # A small cache to avoid doing the work twice for merges
                 #
                 # In the vast majority of cases, if we ask information for a revision
                 # about 1 parent, we'll later ask it for the other. So it make sense to
                 # keep the information around when reaching the first parent of a merge
                 # and dropping it after it was provided for the second parents.
                 #
                 # It exists cases were only one parent of the merge will be walked. It
                 # happens when the "destination" the copy tracing is descendant from a
                 # new root, not common with the "source". In that case, we will only walk
                 # through merge parents that are descendant of changesets common
                 # between "source" and "destination".
                 #
                 # With the current case implementation if such changesets have a copy
                 # information, we'll keep them in memory until the end of
                 # _changesetforwardcopies. We don't expect the case to be frequent
                 # enough to matters.
                 #
                 # In addition, it would be possible to reach pathological case, were
                 # many first parent are met before any second parent is reached. In
                 # that case the cache could grow. If this even become an issue one can
                 # safely introduce a maximum cache size. This would trade extra CPU/IO
                 # time to save memory.
                 merge_caches = {}
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     value = None
                     e = merge_caches.pop(rev, None)
                     if e is not None:
                         return e
                     changes = None
                     if flags(rev) & HASCOPIESINFO:
                         changes = changelogrevision(rev).changes
                     value = (p1, p2, changes)
                     if p1 != node.nullrev and p2 != node.nullrev:
                         # XXX some case we over cache, IGNORE
                         merge_caches[rev] = value
                     return value
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 if repo.filecopiesmode == b'changeset-sidedata':
                     revinfo = _revinfo_getter(repo)
                     return _combine_changeset_copies(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None and alwaysmatch:
                     return rustmod.combine_changeset_copies(
                         list(revs), children, targetrev, revinfo, isancestor
                     )
                 isancestor = cached_is_ancestor(isancestor)
                 all_copies = {}
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, changes = revinfo(c)
                         childcopies = {}
                         if r == p1:
                             parent = 1
                             if changes is not None:
                                 childcopies = changes.copied_from_p1
                         else:
                             assert r == p2
                             parent = 2
                             if changes is not None:
                                 childcopies = changes.copied_from_p2
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         if changes is not None:
                             for f in changes.removed:
                                 if f in newcopies:
                                     if newcopies is copies:
                                         # copy on write to avoid affecting potential other
                                         # branches.  when there are no other branches, this
                                         # could be avoided.
                                         newcopies = copies.copy()
                                     newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
+                        elif newcopies is othercopies:
+                            # nothing to merge:
+                            pass
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict(
                                     othercopies, newcopies, isancestor, changes
                                 )
                             else:
                                 _merge_copies_dict(
                                     newcopies, othercopies, isancestor, changes
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if new_tt == other_tt:
                             minor[dest] = value
                         elif (
                             changes is not None
                             and value[1] is None
                             and dest in changes.salvaged
                         ):
                             pass
                         elif (
                             changes is not None
                             and other[1] is None
                             and dest in changes.salvaged
                         ):
                             minor[dest] = value
                         elif changes is not None and dest in changes.merged:
                             minor[dest] = value
                         elif not isancestor(new_tt, other_tt):
                             if value[1] is not None:
                                 minor[dest] = value
                             elif isancestor(other_tt, new_tt):
                                 minor[dest] = value
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)