upstream/mercurial-mirror Commit - r46265:7990e7d9

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

pycompat,

20

pycompat,

21

util,

21

util,

22

)

22

)

23

24

25

from .utils import stringutil

25

from .utils import stringutil

26

27

from .revlogutils import flagutil

27

from .revlogutils import flagutil

28

29

30

def _filter(src, dst, t):

30

def _filter(src, dst, t):

31

"""filters out invalid copies after chaining"""

31

"""filters out invalid copies after chaining"""

32

33

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

33

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

34

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

34

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

35

# in the following table (not including trivial cases). For example, case 2

35

# in the following table (not including trivial cases). For example, case 2

36

# is where a file existed in 'src' and remained under that name in 'mid' and

36

# is where a file existed in 'src' and remained under that name in 'mid' and

37

# then was renamed between 'mid' and 'dst'.

37

# then was renamed between 'mid' and 'dst'.

38

#

38

#

39

# case src mid dst result

39

# case src mid dst result

40

# 1 x y - -

40

# 1 x y - -

41

# 2 x y y x->y

41

# 2 x y y x->y

42

# 3 x y x -

42

# 3 x y x -

43

# 4 x y z x->z

43

# 4 x y z x->z

44

# 5 - x y -

44

# 5 - x y -

45

# 6 x x y x->y

45

# 6 x x y x->y

46

#

46

#

47

# _chain() takes care of chaining the copies in 'a' and 'b', but it

47

# _chain() takes care of chaining the copies in 'a' and 'b', but it

48

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

48

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

49

# between 5 and 6, so it includes all cases in its result.

49

# between 5 and 6, so it includes all cases in its result.

50

# Cases 1, 3, and 5 are then removed by _filter().

50

# Cases 1, 3, and 5 are then removed by _filter().

51

52

for k, v in list(t.items()):

52

for k, v in list(t.items()):

53

# remove copies from files that didn't exist

53

# remove copies from files that didn't exist

54

if v not in src:

54

if v not in src:

55

del t[k]

55

del t[k]

56

# remove criss-crossed copies

56

# remove criss-crossed copies

57

elif k in src and v in dst:

57

elif k in src and v in dst:

58

del t[k]

58

del t[k]

59

# remove copies to files that were then removed

59

# remove copies to files that were then removed

60

elif k not in dst:

60

elif k not in dst:

61

del t[k]

61

del t[k]

62

63

64

def _chain(prefix, suffix):

64

def _chain(prefix, suffix):

65

"""chain two sets of copies 'prefix' and 'suffix'"""

65

"""chain two sets of copies 'prefix' and 'suffix'"""

66

result = prefix.copy()

66

result = prefix.copy()

67

for key, value in pycompat.iteritems(suffix):

67

for key, value in pycompat.iteritems(suffix):

68

result[key] = prefix.get(value, value)

68

result[key] = prefix.get(value, value)

69

return result

69

return result

70

71

72

def _tracefile(fctx, am, basemf):

72

def _tracefile(fctx, am, basemf):

73

"""return file context that is the ancestor of fctx present in ancestor

73

"""return file context that is the ancestor of fctx present in ancestor

74

manifest am

74

manifest am

75

76

Note: we used to try and stop after a given limit, however checking if that

76

Note: we used to try and stop after a given limit, however checking if that

77

limit is reached turned out to be very expensive. we are better off

77

limit is reached turned out to be very expensive. we are better off

78

disabling that feature."""

78

disabling that feature."""

79

80

for f in fctx.ancestors():

80

for f in fctx.ancestors():

81

path = f.path()

81

path = f.path()

82

if am.get(path, None) == f.filenode():

82

if am.get(path, None) == f.filenode():

83

return path

83

return path

84

if basemf and basemf.get(path, None) == f.filenode():

84

if basemf and basemf.get(path, None) == f.filenode():

85

return path

85

return path

86

87

88

def _dirstatecopies(repo, match=None):

88

def _dirstatecopies(repo, match=None):

89

ds = repo.dirstate

89

ds = repo.dirstate

90

c = ds.copies().copy()

90

c = ds.copies().copy()

91

for k in list(c):

91

for k in list(c):

92

if ds[k] not in b'anm' or (match and not match(k)):

92

if ds[k] not in b'anm' or (match and not match(k)):

93

del c[k]

93

del c[k]

94

return c

94

return c

95

96

97

def _computeforwardmissing(a, b, match=None):

97

def _computeforwardmissing(a, b, match=None):

98

"""Computes which files are in b but not a.

98

"""Computes which files are in b but not a.

99

This is its own function so extensions can easily wrap this call to see what

99

This is its own function so extensions can easily wrap this call to see what

100

files _forwardcopies is about to process.

100

files _forwardcopies is about to process.

101

"""

101

"""

102

ma = a.manifest()

102

ma = a.manifest()

103

mb = b.manifest()

103

mb = b.manifest()

104

return mb.filesnotin(ma, match=match)

104

return mb.filesnotin(ma, match=match)

105

106

107

def usechangesetcentricalgo(repo):

107

def usechangesetcentricalgo(repo):

108

"""Checks if we should use changeset-centric copy algorithms"""

108

"""Checks if we should use changeset-centric copy algorithms"""

109

if repo.filecopiesmode == b'changeset-sidedata':

109

if repo.filecopiesmode == b'changeset-sidedata':

110

return True

110

return True

111

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

111

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

112

changesetsource = (b'changeset-only', b'compatibility')

112

changesetsource = (b'changeset-only', b'compatibility')

113

return readfrom in changesetsource

113

return readfrom in changesetsource

114

115

116

def _committedforwardcopies(a, b, base, match):

116

def _committedforwardcopies(a, b, base, match):

117

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

117

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

118

# files might have to be traced back to the fctx parent of the last

118

# files might have to be traced back to the fctx parent of the last

119

# one-side-only changeset, but not further back than that

119

# one-side-only changeset, but not further back than that

120

repo = a._repo

120

repo = a._repo

121

122

if usechangesetcentricalgo(repo):

122

if usechangesetcentricalgo(repo):

123

return _changesetforwardcopies(a, b, match)

123

return _changesetforwardcopies(a, b, match)

124

125

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

125

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

126

dbg = repo.ui.debug

126

dbg = repo.ui.debug

127

if debug:

127

if debug:

128

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

128

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

129

am = a.manifest()

129

am = a.manifest()

130

basemf = None if base is None else base.manifest()

130

basemf = None if base is None else base.manifest()

131

132

# find where new files came from

132

# find where new files came from

133

# we currently don't try to find where old files went, too expensive

133

# we currently don't try to find where old files went, too expensive

134

# this means we can miss a case like 'hg rm b; hg cp a b'

134

# this means we can miss a case like 'hg rm b; hg cp a b'

135

cm = {}

135

cm = {}

136

137

# Computing the forward missing is quite expensive on large manifests, since

137

# Computing the forward missing is quite expensive on large manifests, since

138

# it compares the entire manifests. We can optimize it in the common use

138

# it compares the entire manifests. We can optimize it in the common use

139

# case of computing what copies are in a commit versus its parent (like

139

# case of computing what copies are in a commit versus its parent (like

140

# during a rebase or histedit). Note, we exclude merge commits from this

140

# during a rebase or histedit). Note, we exclude merge commits from this

141

# optimization, since the ctx.files() for a merge commit is not correct for

141

# optimization, since the ctx.files() for a merge commit is not correct for

142

# this comparison.

142

# this comparison.

143

forwardmissingmatch = match

143

forwardmissingmatch = match

144

if b.p1() == a and b.p2().node() == node.nullid:

144

if b.p1() == a and b.p2().node() == node.nullid:

145

filesmatcher = matchmod.exact(b.files())

145

filesmatcher = matchmod.exact(b.files())

146

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

146

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

147

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

147

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

148

149

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

149

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

150

151

if debug:

151

if debug:

152

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

152

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

153

154

for f in sorted(missing):

154

for f in sorted(missing):

155

if debug:

155

if debug:

156

dbg(b'debug.copies: tracing file: %s\n' % f)

156

dbg(b'debug.copies: tracing file: %s\n' % f)

157

fctx = b[f]

157

fctx = b[f]

158

fctx._ancestrycontext = ancestrycontext

158

fctx._ancestrycontext = ancestrycontext

159

160

if debug:

160

if debug:

161

start = util.timer()

161

start = util.timer()

162

opath = _tracefile(fctx, am, basemf)

162

opath = _tracefile(fctx, am, basemf)

163

if opath:

163

if opath:

164

if debug:

164

if debug:

165

dbg(b'debug.copies: rename of: %s\n' % opath)

165

dbg(b'debug.copies: rename of: %s\n' % opath)

166

cm[f] = opath

166

cm[f] = opath

167

if debug:

167

if debug:

168

dbg(

168

dbg(

169

b'debug.copies: time: %f seconds\n'

169

b'debug.copies: time: %f seconds\n'

170

% (util.timer() - start)

170

% (util.timer() - start)

171

)

171

)

172

return cm

172

return cm

173

174

175

def _revinfo_getter(repo):

175

def _revinfo_getter(repo):

176

"""returns a function that returns the following data given a <rev>"

176

"""returns a function that returns the following data given a <rev>"

177

178

* p1: revision number of first parent

178

* p1: revision number of first parent

179

* p2: revision number of first parent

179

* p2: revision number of first parent

180

* changes: a ChangingFiles object

180

* changes: a ChangingFiles object

181

"""

181

"""

182

cl = repo.changelog

182

cl = repo.changelog

183

parents = cl.parentrevs

183

parents = cl.parentrevs

184

flags = cl.flags

184

flags = cl.flags

185

186

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

186

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

187

188

changelogrevision = cl.changelogrevision

188

changelogrevision = cl.changelogrevision

189

190

# A small cache to avoid doing the work twice for merges

190

# A small cache to avoid doing the work twice for merges

191

#

191

#

192

# In the vast majority of cases, if we ask information for a revision

192

# In the vast majority of cases, if we ask information for a revision

193

# about 1 parent, we'll later ask it for the other. So it make sense to

193

# about 1 parent, we'll later ask it for the other. So it make sense to

194

# keep the information around when reaching the first parent of a merge

194

# keep the information around when reaching the first parent of a merge

195

# and dropping it after it was provided for the second parents.

195

# and dropping it after it was provided for the second parents.

196

#

196

#

197

# It exists cases were only one parent of the merge will be walked. It

197

# It exists cases were only one parent of the merge will be walked. It

198

# happens when the "destination" the copy tracing is descendant from a

198

# happens when the "destination" the copy tracing is descendant from a

199

# new root, not common with the "source". In that case, we will only walk

199

# new root, not common with the "source". In that case, we will only walk

200

# through merge parents that are descendant of changesets common

200

# through merge parents that are descendant of changesets common

201

# between "source" and "destination".

201

# between "source" and "destination".

202

#

202

#

203

# With the current case implementation if such changesets have a copy

203

# With the current case implementation if such changesets have a copy

204

# information, we'll keep them in memory until the end of

204

# information, we'll keep them in memory until the end of

205

# _changesetforwardcopies. We don't expect the case to be frequent

205

# _changesetforwardcopies. We don't expect the case to be frequent

206

# enough to matters.

206

# enough to matters.

207

#

207

#

208

# In addition, it would be possible to reach pathological case, were

208

# In addition, it would be possible to reach pathological case, were

209

# many first parent are met before any second parent is reached. In

209

# many first parent are met before any second parent is reached. In

210

# that case the cache could grow. If this even become an issue one can

210

# that case the cache could grow. If this even become an issue one can

211

# safely introduce a maximum cache size. This would trade extra CPU/IO

211

# safely introduce a maximum cache size. This would trade extra CPU/IO

212

# time to save memory.

212

# time to save memory.

213

merge_caches = {}

213

merge_caches = {}

214

215

def revinfo(rev):

215

def revinfo(rev):

216

p1, p2 = parents(rev)

216

p1, p2 = parents(rev)

217

value = None

217

value = None

218

e = merge_caches.pop(rev, None)

218

e = merge_caches.pop(rev, None)

219

if e is not None:

219

if e is not None:

220

return e

220

return e

221

changes = None

221

changes = None

222

if flags(rev) & HASCOPIESINFO:

222

if flags(rev) & HASCOPIESINFO:

223

changes = changelogrevision(rev).changes

223

changes = changelogrevision(rev).changes

224

value = (p1, p2, changes)

224

value = (p1, p2, changes)

225

if p1 != node.nullrev and p2 != node.nullrev:

225

if p1 != node.nullrev and p2 != node.nullrev:

226

# XXX some case we over cache, IGNORE

226

# XXX some case we over cache, IGNORE

227

merge_caches[rev] = value

227

merge_caches[rev] = value

228

return value

228

return value

229

230

return revinfo

230

return revinfo

231

232

233

def _changesetforwardcopies(a, b, match):

233

def _changesetforwardcopies(a, b, match):

234

if a.rev() in (node.nullrev, b.rev()):

234

if a.rev() in (node.nullrev, b.rev()):

235

return {}

235

return {}

236

237

repo = a.repo().unfiltered()

237

repo = a.repo().unfiltered()

238

children = {}

238

children = {}

239

240

cl = repo.changelog

240

cl = repo.changelog

241

isancestor = cl.isancestorrev # XXX we should had chaching to this.

241

isancestor = cl.isancestorrev # XXX we should had chaching to this.

242

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

242

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

243

mrset = set(missingrevs)

243

mrset = set(missingrevs)

244

roots = set()

244

roots = set()

245

for r in missingrevs:

245

for r in missingrevs:

246

for p in cl.parentrevs(r):

246

for p in cl.parentrevs(r):

247

if p == node.nullrev:

247

if p == node.nullrev:

248

continue

248

continue

249

if p not in children:

249

if p not in children:

250

children[p] = [r]

250

children[p] = [r]

251

else:

251

else:

252

children[p].append(r)

252

children[p].append(r)

253

if p not in mrset:

253

if p not in mrset:

254

roots.add(p)

254

roots.add(p)

255

if not roots:

255

if not roots:

256

# no common revision to track copies from

256

# no common revision to track copies from

257

return {}

257

return {}

258

min_root = min(roots)

258

min_root = min(roots)

259

260

from_head = set(

260

from_head = set(

261

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

261

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

262

)

262

)

263

264

iterrevs = set(from_head)

264

iterrevs = set(from_head)

265

iterrevs &= mrset

265

iterrevs &= mrset

266

iterrevs.update(roots)

266

iterrevs.update(roots)

267

iterrevs.remove(b.rev())

267

iterrevs.remove(b.rev())

268

revs = sorted(iterrevs)

268

revs = sorted(iterrevs)

269

270

if repo.filecopiesmode == b'changeset-sidedata':

270

if repo.filecopiesmode == b'changeset-sidedata':

271

revinfo = _revinfo_getter(repo)

271

revinfo = _revinfo_getter(repo)

272

return _combine_changeset_copies(

272

return _combine_changeset_copies(

273

revs, children, b.rev(), revinfo, match, isancestor

273

revs, children, b.rev(), revinfo, match, isancestor

274

)

274

)

275

else:

275

else:

276

revinfo = _revinfo_getter_extra(repo)

276

revinfo = _revinfo_getter_extra(repo)

277

return _combine_changeset_copies_extra(

277

return _combine_changeset_copies_extra(

278

revs, children, b.rev(), revinfo, match, isancestor

278

revs, children, b.rev(), revinfo, match, isancestor

279

)

279

)

280

281

282

def _combine_changeset_copies(

282

def _combine_changeset_copies(

283

revs, children, targetrev, revinfo, match, isancestor

283

revs, children, targetrev, revinfo, match, isancestor

284

):

284

):

285

"""combine the copies information for each item of iterrevs

285

"""combine the copies information for each item of iterrevs

286

287

revs: sorted iterable of revision to visit

287

revs: sorted iterable of revision to visit

288

children: a {parent: [children]} mapping.

288

children: a {parent: [children]} mapping.

289

targetrev: the final copies destination revision (not in iterrevs)

289

targetrev: the final copies destination revision (not in iterrevs)

290

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

290

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

291

match: a matcher

291

match: a matcher

292

293

It returns the aggregated copies information for `targetrev`.

293

It returns the aggregated copies information for `targetrev`.

294

"""

294

"""

295

all_copies = {}

295

all_copies = {}

296

alwaysmatch = match.always()

296

alwaysmatch = match.always()

297

for r in revs:

297

for r in revs:

298

copies = all_copies.pop(r, None)

298

copies = all_copies.pop(r, None)

299

if copies is None:

299

if copies is None:

300

# this is a root

300

# this is a root

301

copies = {}

301

copies = {}

302

for i, c in enumerate(children[r]):

302

for i, c in enumerate(children[r]):

303

p1, p2, changes = revinfo(c)

303

p1, p2, changes = revinfo(c)

304

childcopies = {}

304

childcopies = {}

305

if r == p1:

305

if r == p1:

306

parent = 1

306

parent = 1

307

if changes is not None:

307

if changes is not None:

308

childcopies = changes.copied_from_p1

308

childcopies = changes.copied_from_p1

309

else:

309

else:

310

assert r == p2

310

assert r == p2

311

parent = 2

311

parent = 2

312

if changes is not None:

312

if changes is not None:

313

childcopies = changes.copied_from_p2

313

childcopies = changes.copied_from_p2

314

if not alwaysmatch:

314

if not alwaysmatch:

315

childcopies = {

315

childcopies = {

316

dst: src for dst, src in childcopies.items() if match(dst)

316

dst: src for dst, src in childcopies.items() if match(dst)

317

}

317

}

318

newcopies = copies

318

newcopies = copies

319

if childcopies:

319

if childcopies:

320

newcopies = copies.copy()

320

newcopies = copies.copy()

321

for dest, source in pycompat.iteritems(childcopies):

321

for dest, source in pycompat.iteritems(childcopies):

322

prev = copies.get(source)

322

prev = copies.get(source)

323

if prev is not None and prev[1] is not None:

323

if prev is not None and prev[1] is not None:

324

source = prev[1]

324

source = prev[1]

325

newcopies[dest] = (c, source)

325

newcopies[dest] = (c, source)

326

assert newcopies is not copies

326

assert newcopies is not copies

327

if changes is not None:

327

if changes is not None:

328

for f in changes.removed:

328

for f in changes.removed:

329

if f in newcopies:

329

if f in newcopies:

330

if newcopies is copies:

330

if newcopies is copies:

331

# copy on write to avoid affecting potential other

331

# copy on write to avoid affecting potential other

332

# branches. when there are no other branches, this

332

# branches. when there are no other branches, this

333

# could be avoided.

333

# could be avoided.

334

newcopies = copies.copy()

334

newcopies = copies.copy()

335

newcopies[f] = (c, None)

335

newcopies[f] = (c, None)

336

othercopies = all_copies.get(c)

336

othercopies = all_copies.get(c)

337

if othercopies is None:

337

if othercopies is None:

338

all_copies[c] = newcopies

338

all_copies[c] = newcopies

339

else:

339

else:

340

# we are the second parent to work on c, we need to merge our

340

# we are the second parent to work on c, we need to merge our

341

# work with the other.

341

# work with the other.

342

#

342

#

343

# In case of conflict, parent 1 take precedence over parent 2.

343

# In case of conflict, parent 1 take precedence over parent 2.

344

# This is an arbitrary choice made anew when implementing

344

# This is an arbitrary choice made anew when implementing

345

# changeset based copies. It was made without regards with

345

# changeset based copies. It was made without regards with

346

# potential filelog related behavior.

346

# potential filelog related behavior.

347

if parent == 1:

347

if parent == 1:

348

_merge_copies_dict(

348

_merge_copies_dict(

349

othercopies, newcopies, isancestor, changes

349

othercopies, newcopies, isancestor, changes

350

)

350

)

351

else:

351

else:

352

_merge_copies_dict(

352

_merge_copies_dict(

353

newcopies, othercopies, isancestor, changes

353

newcopies, othercopies, isancestor, changes

354

)

354

)

355

all_copies[c] = newcopies

355

all_copies[c] = newcopies

356

357

final_copies = {}

357

final_copies = {}

358

for dest, (tt, source) in all_copies[targetrev].items():

358

for dest, (tt, source) in all_copies[targetrev].items():

359

if source is not None:

359

if source is not None:

360

final_copies[dest] = source

360

final_copies[dest] = source

361

return final_copies

361

return final_copies

362

363

364

def _merge_copies_dict(minor, major, isancestor, changes):

364

def _merge_copies_dict(minor, major, isancestor, changes):

365

"""merge two copies-mapping together, minor and major

365

"""merge two copies-mapping together, minor and major

366

367

In case of conflict, value from "major" will be picked.

367

In case of conflict, value from "major" will be picked.

368

369

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

369

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

370

ancestors of `high_rev`,

370

ancestors of `high_rev`,

371

372

- `ismerged(path)`: callable return True if `path` have been merged in the

372

- `ismerged(path)`: callable return True if `path` have been merged in the

373

current revision,

373

current revision,

374

"""

374

"""

375

for dest, value in major.items():

375

for dest, value in major.items():

376

other = minor.get(dest)

376

other = minor.get(dest)

377

if other is None:

377

if other is None:

378

minor[dest] = value

378

minor[dest] = value

379

else:

379

else:

380

new_tt = value[0]

380

new_tt = value[0]

381

other_tt = other[0]

381

other_tt = other[0]

382

if value[1] == other[1]:

382

if value[1] == other[1]:

383

continue

383

continue

384

# content from "major" wins, unless it is older

384

# content from "major" wins, unless it is older

385

# than the branch point or there is a merge

385

# than the branch point or there is a merge

386

if new_tt == other_tt:

386

if new_tt == other_tt:

387

minor[dest] = value

387

minor[dest] = value

388

elif (

388

elif (

389

changes is not None

389

changes is not None

390

and value[1] is None

390

and value[1] is None

391

and dest in changes.salvaged

391

and dest in changes.salvaged

392

):

392

):

393

pass

393

pass

394

elif (

394

elif (

395

changes is not None

395

changes is not None

396

and other[1] is None

396

and other[1] is None

397

and dest in changes.salvaged

397

and dest in changes.salvaged

398

):

398

):

399

minor[dest] = value

399

minor[dest] = value

400

elif not isancestor(new_tt, other_tt):

400

elif changes is not None and dest in changes.merged:

401

minor[dest] = value

401

minor[dest] = value

402

elif changes is not None and dest in changes.merged:

402

elif not isancestor(new_tt, other_tt):

403

minor[dest] = value

403

minor[dest] = value

404

405

406

def _revinfo_getter_extra(repo):

406

def _revinfo_getter_extra(repo):

407

"""return a function that return multiple data given a <rev>"i

407

"""return a function that return multiple data given a <rev>"i

408

409

* p1: revision number of first parent

409

* p1: revision number of first parent

410

* p2: revision number of first parent

410

* p2: revision number of first parent

411

* p1copies: mapping of copies from p1

411

* p1copies: mapping of copies from p1

412

* p2copies: mapping of copies from p2

412

* p2copies: mapping of copies from p2

413

* removed: a list of removed files

413

* removed: a list of removed files

414

* ismerged: a callback to know if file was merged in that revision

414

* ismerged: a callback to know if file was merged in that revision

415

"""

415

"""

416

cl = repo.changelog

416

cl = repo.changelog

417

parents = cl.parentrevs

417

parents = cl.parentrevs

418

419

def get_ismerged(rev):

419

def get_ismerged(rev):

420

ctx = repo[rev]

420

ctx = repo[rev]

421

422

def ismerged(path):

422

def ismerged(path):

423

if path not in ctx.files():

423

if path not in ctx.files():

424

return False

424

return False

425

fctx = ctx[path]

425

fctx = ctx[path]

426

parents = fctx._filelog.parents(fctx._filenode)

426

parents = fctx._filelog.parents(fctx._filenode)

427

nb_parents = 0

427

nb_parents = 0

428

for n in parents:

428

for n in parents:

429

if n != node.nullid:

429

if n != node.nullid:

430

nb_parents += 1

430

nb_parents += 1

431

return nb_parents >= 2

431

return nb_parents >= 2

432

433

return ismerged

433

return ismerged

434

435

def revinfo(rev):

435

def revinfo(rev):

436

p1, p2 = parents(rev)

436

p1, p2 = parents(rev)

437

ctx = repo[rev]

437

ctx = repo[rev]

438

p1copies, p2copies = ctx._copies

438

p1copies, p2copies = ctx._copies

439

removed = ctx.filesremoved()

439

removed = ctx.filesremoved()

440

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

440

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

441

442

return revinfo

442

return revinfo

443

444

445

def _combine_changeset_copies_extra(

445

def _combine_changeset_copies_extra(

446

revs, children, targetrev, revinfo, match, isancestor

446

revs, children, targetrev, revinfo, match, isancestor

447

):

447

):

448

"""version of `_combine_changeset_copies` that works with the Google

448

"""version of `_combine_changeset_copies` that works with the Google

449

specific "extra" based storage for copy information"""

449

specific "extra" based storage for copy information"""

450

all_copies = {}

450

all_copies = {}

451

alwaysmatch = match.always()

451

alwaysmatch = match.always()

452

for r in revs:

452

for r in revs:

453

copies = all_copies.pop(r, None)

453

copies = all_copies.pop(r, None)

454

if copies is None:

454

if copies is None:

455

# this is a root

455

# this is a root

456

copies = {}

456

copies = {}

457

for i, c in enumerate(children[r]):

457

for i, c in enumerate(children[r]):

458

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

458

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

459

if r == p1:

459

if r == p1:

460

parent = 1

460

parent = 1

461

childcopies = p1copies

461

childcopies = p1copies

462

else:

462

else:

463

assert r == p2

463

assert r == p2

464

parent = 2

464

parent = 2

465

childcopies = p2copies

465

childcopies = p2copies

466

if not alwaysmatch:

466

if not alwaysmatch:

467

childcopies = {

467

childcopies = {

468

dst: src for dst, src in childcopies.items() if match(dst)

468

dst: src for dst, src in childcopies.items() if match(dst)

469

}

469

}

470

newcopies = copies

470

newcopies = copies

471

if childcopies:

471

if childcopies:

472

newcopies = copies.copy()

472

newcopies = copies.copy()

473

for dest, source in pycompat.iteritems(childcopies):

473

for dest, source in pycompat.iteritems(childcopies):

474

prev = copies.get(source)

474

prev = copies.get(source)

475

if prev is not None and prev[1] is not None:

475

if prev is not None and prev[1] is not None:

476

source = prev[1]

476

source = prev[1]

477

newcopies[dest] = (c, source)

477

newcopies[dest] = (c, source)

478

assert newcopies is not copies

478

assert newcopies is not copies

479

for f in removed:

479

for f in removed:

480

if f in newcopies:

480

if f in newcopies:

481

if newcopies is copies:

481

if newcopies is copies:

482

# copy on write to avoid affecting potential other

482

# copy on write to avoid affecting potential other

483

# branches. when there are no other branches, this

483

# branches. when there are no other branches, this

484

# could be avoided.

484

# could be avoided.

485

newcopies = copies.copy()

485

newcopies = copies.copy()

486

newcopies[f] = (c, None)

486

newcopies[f] = (c, None)

487

othercopies = all_copies.get(c)

487

othercopies = all_copies.get(c)

488

if othercopies is None:

488

if othercopies is None:

489

all_copies[c] = newcopies

489

all_copies[c] = newcopies

490

else:

490

else:

491

# we are the second parent to work on c, we need to merge our

491

# we are the second parent to work on c, we need to merge our

492

# work with the other.

492

# work with the other.

493

#

493

#

494

# In case of conflict, parent 1 take precedence over parent 2.

494

# In case of conflict, parent 1 take precedence over parent 2.

495

# This is an arbitrary choice made anew when implementing

495

# This is an arbitrary choice made anew when implementing

496

# changeset based copies. It was made without regards with

496

# changeset based copies. It was made without regards with

497

# potential filelog related behavior.

497

# potential filelog related behavior.

498

if parent == 1:

498

if parent == 1:

499

_merge_copies_dict_extra(

499

_merge_copies_dict_extra(

500

othercopies, newcopies, isancestor, ismerged

500

othercopies, newcopies, isancestor, ismerged

501

)

501

)

502

else:

502

else:

503

_merge_copies_dict_extra(

503

_merge_copies_dict_extra(

504

newcopies, othercopies, isancestor, ismerged

504

newcopies, othercopies, isancestor, ismerged

505

)

505

)

506

all_copies[c] = newcopies

506

all_copies[c] = newcopies

507

508

final_copies = {}

508

final_copies = {}

509

for dest, (tt, source) in all_copies[targetrev].items():

509

for dest, (tt, source) in all_copies[targetrev].items():

510

if source is not None:

510

if source is not None:

511

final_copies[dest] = source

511

final_copies[dest] = source

512

return final_copies

512

return final_copies

513

514

515

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

515

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

516

"""version of `_merge_copies_dict` that works with the Google

516

"""version of `_merge_copies_dict` that works with the Google

517

specific "extra" based storage for copy information"""

517

specific "extra" based storage for copy information"""

518

for dest, value in major.items():

518

for dest, value in major.items():

519

other = minor.get(dest)

519

other = minor.get(dest)

520

if other is None:

520

if other is None:

521

minor[dest] = value

521

minor[dest] = value

522

else:

522

else:

523

new_tt = value[0]

523

new_tt = value[0]

524

other_tt = other[0]

524

other_tt = other[0]

525

if value[1] == other[1]:

525

if value[1] == other[1]:

526

continue

526

continue

527

# content from "major" wins, unless it is older

527

# content from "major" wins, unless it is older

528

# than the branch point or there is a merge

528

# than the branch point or there is a merge

529

if (

529

if (

530

new_tt == other_tt

530

new_tt == other_tt

531

or not isancestor(new_tt, other_tt)

531

or not isancestor(new_tt, other_tt)

532

or ismerged(dest)

532

or ismerged(dest)

533

):

533

):

534

minor[dest] = value

534

minor[dest] = value

535

536

537

def _forwardcopies(a, b, base=None, match=None):

537

def _forwardcopies(a, b, base=None, match=None):

538

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

538

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

539

540

if base is None:

540

if base is None:

541

base = a

541

base = a

542

match = a.repo().narrowmatch(match)

542

match = a.repo().narrowmatch(match)

543

# check for working copy

543

# check for working copy

544

if b.rev() is None:

544

if b.rev() is None:

545

cm = _committedforwardcopies(a, b.p1(), base, match)

545

cm = _committedforwardcopies(a, b.p1(), base, match)

546

# combine copies from dirstate if necessary

546

# combine copies from dirstate if necessary

547

copies = _chain(cm, _dirstatecopies(b._repo, match))

547

copies = _chain(cm, _dirstatecopies(b._repo, match))

548

else:

548

else:

549

copies = _committedforwardcopies(a, b, base, match)

549

copies = _committedforwardcopies(a, b, base, match)

550

return copies

550

return copies

551

552

553

def _backwardrenames(a, b, match):

553

def _backwardrenames(a, b, match):

554

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

554

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

555

return {}

555

return {}

556

557

# Even though we're not taking copies into account, 1:n rename situations

557

# Even though we're not taking copies into account, 1:n rename situations

558

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

558

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

559

# arbitrarily pick one of the renames.

559

# arbitrarily pick one of the renames.

560

# We don't want to pass in "match" here, since that would filter

560

# We don't want to pass in "match" here, since that would filter

561

# the destination by it. Since we're reversing the copies, we want

561

# the destination by it. Since we're reversing the copies, we want

562

# to filter the source instead.

562

# to filter the source instead.

563

f = _forwardcopies(b, a)

563

f = _forwardcopies(b, a)

564

r = {}

564

r = {}

565

for k, v in sorted(pycompat.iteritems(f)):

565

for k, v in sorted(pycompat.iteritems(f)):

566

if match and not match(v):

566

if match and not match(v):

567

continue

567

continue

568

# remove copies

568

# remove copies

569

if v in a:

569

if v in a:

570

continue

570

continue

571

r[v] = k

571

r[v] = k

572

return r

572

return r

573

574

575

def pathcopies(x, y, match=None):

575

def pathcopies(x, y, match=None):

576

"""find {dst@y: src@x} copy mapping for directed compare"""

576

"""find {dst@y: src@x} copy mapping for directed compare"""

577

repo = x._repo

577

repo = x._repo

578

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

578

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

579

if debug:

579

if debug:

580

repo.ui.debug(

580

repo.ui.debug(

581

b'debug.copies: searching copies from %s to %s\n' % (x, y)

581

b'debug.copies: searching copies from %s to %s\n' % (x, y)

582

)

582

)

583

if x == y or not x or not y:

583

if x == y or not x or not y:

584

return {}

584

return {}

585

if y.rev() is None and x == y.p1():

585

if y.rev() is None and x == y.p1():

586

if debug:

586

if debug:

587

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

587

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

588

# short-circuit to avoid issues with merge states

588

# short-circuit to avoid issues with merge states

589

return _dirstatecopies(repo, match)

589

return _dirstatecopies(repo, match)

590

a = y.ancestor(x)

590

a = y.ancestor(x)

591

if a == x:

591

if a == x:

592

if debug:

592

if debug:

593

repo.ui.debug(b'debug.copies: search mode: forward\n')

593

repo.ui.debug(b'debug.copies: search mode: forward\n')

594

copies = _forwardcopies(x, y, match=match)

594

copies = _forwardcopies(x, y, match=match)

595

elif a == y:

595

elif a == y:

596

if debug:

596

if debug:

597

repo.ui.debug(b'debug.copies: search mode: backward\n')

597

repo.ui.debug(b'debug.copies: search mode: backward\n')

598

copies = _backwardrenames(x, y, match=match)

598

copies = _backwardrenames(x, y, match=match)

599

else:

599

else:

600

if debug:

600

if debug:

601

repo.ui.debug(b'debug.copies: search mode: combined\n')

601

repo.ui.debug(b'debug.copies: search mode: combined\n')

602

base = None

602

base = None

603

if a.rev() != node.nullrev:

603

if a.rev() != node.nullrev:

604

base = x

604

base = x

605

copies = _chain(

605

copies = _chain(

606

_backwardrenames(x, a, match=match),

606

_backwardrenames(x, a, match=match),

607

_forwardcopies(a, y, base, match=match),

607

_forwardcopies(a, y, base, match=match),

608

)

608

)

609

_filter(x, y, copies)

609

_filter(x, y, copies)

610

return copies

610

return copies

611

612

613

def mergecopies(repo, c1, c2, base):

613

def mergecopies(repo, c1, c2, base):

614

"""

614

"""

615

Finds moves and copies between context c1 and c2 that are relevant for

615

Finds moves and copies between context c1 and c2 that are relevant for

616

merging. 'base' will be used as the merge base.

616

merging. 'base' will be used as the merge base.

617

618

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

618

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

619

files that were moved/ copied in one merge parent and modified in another.

619

files that were moved/ copied in one merge parent and modified in another.

620

For example:

620

For example:

621

622

o ---> 4 another commit

622

o ---> 4 another commit

623

|

623

|

624

| o ---> 3 commit that modifies a.txt

624

| o ---> 3 commit that modifies a.txt

625

| /

625

| /

626

o / ---> 2 commit that moves a.txt to b.txt

626

o / ---> 2 commit that moves a.txt to b.txt

627

|/

627

|/

628

o ---> 1 merge base

628

o ---> 1 merge base

629

630

If we try to rebase revision 3 on revision 4, since there is no a.txt in

630

If we try to rebase revision 3 on revision 4, since there is no a.txt in

631

revision 4, and if user have copytrace disabled, we prints the following

631

revision 4, and if user have copytrace disabled, we prints the following

632

message:

632

message:

633

634

```other changed <file> which local deleted```

634

```other changed <file> which local deleted```

635

636

Returns a tuple where:

636

Returns a tuple where:

637

638

"branch_copies" an instance of branch_copies.

638

"branch_copies" an instance of branch_copies.

639

640

"diverge" is a mapping of source name -> list of destination names

640

"diverge" is a mapping of source name -> list of destination names

641

for divergent renames.

641

for divergent renames.

642

643

This function calls different copytracing algorithms based on config.

643

This function calls different copytracing algorithms based on config.

644

"""

644

"""

645

# avoid silly behavior for update from empty dir

645

# avoid silly behavior for update from empty dir

646

if not c1 or not c2 or c1 == c2:

646

if not c1 or not c2 or c1 == c2:

647

return branch_copies(), branch_copies(), {}

647

return branch_copies(), branch_copies(), {}

648

649

narrowmatch = c1.repo().narrowmatch()

649

narrowmatch = c1.repo().narrowmatch()

650

651

# avoid silly behavior for parent -> working dir

651

# avoid silly behavior for parent -> working dir

652

if c2.node() is None and c1.node() == repo.dirstate.p1():

652

if c2.node() is None and c1.node() == repo.dirstate.p1():

653

return (

653

return (

654

branch_copies(_dirstatecopies(repo, narrowmatch)),

654

branch_copies(_dirstatecopies(repo, narrowmatch)),

655

branch_copies(),

655

branch_copies(),

656

{},

656

{},

657

)

657

)

658

659

copytracing = repo.ui.config(b'experimental', b'copytrace')

659

copytracing = repo.ui.config(b'experimental', b'copytrace')

660

if stringutil.parsebool(copytracing) is False:

660

if stringutil.parsebool(copytracing) is False:

661

# stringutil.parsebool() returns None when it is unable to parse the

661

# stringutil.parsebool() returns None when it is unable to parse the

662

# value, so we should rely on making sure copytracing is on such cases

662

# value, so we should rely on making sure copytracing is on such cases

663

return branch_copies(), branch_copies(), {}

663

return branch_copies(), branch_copies(), {}

664

665

if usechangesetcentricalgo(repo):

665

if usechangesetcentricalgo(repo):

666

# The heuristics don't make sense when we need changeset-centric algos

666

# The heuristics don't make sense when we need changeset-centric algos

667

return _fullcopytracing(repo, c1, c2, base)

667

return _fullcopytracing(repo, c1, c2, base)

668

669

# Copy trace disabling is explicitly below the node == p1 logic above

669

# Copy trace disabling is explicitly below the node == p1 logic above

670

# because the logic above is required for a simple copy to be kept across a

670

# because the logic above is required for a simple copy to be kept across a

671

# rebase.

671

# rebase.

672

if copytracing == b'heuristics':

672

if copytracing == b'heuristics':

673

# Do full copytracing if only non-public revisions are involved as

673

# Do full copytracing if only non-public revisions are involved as

674

# that will be fast enough and will also cover the copies which could

674

# that will be fast enough and will also cover the copies which could

675

# be missed by heuristics

675

# be missed by heuristics

676

if _isfullcopytraceable(repo, c1, base):

676

if _isfullcopytraceable(repo, c1, base):

677

return _fullcopytracing(repo, c1, c2, base)

677

return _fullcopytracing(repo, c1, c2, base)

678

return _heuristicscopytracing(repo, c1, c2, base)

678

return _heuristicscopytracing(repo, c1, c2, base)

679

else:

679

else:

680

return _fullcopytracing(repo, c1, c2, base)

680

return _fullcopytracing(repo, c1, c2, base)

681

682

683

def _isfullcopytraceable(repo, c1, base):

683

def _isfullcopytraceable(repo, c1, base):

684

""" Checks that if base, source and destination are all no-public branches,

684

""" Checks that if base, source and destination are all no-public branches,

685

if yes let's use the full copytrace algorithm for increased capabilities

685

if yes let's use the full copytrace algorithm for increased capabilities

686

since it will be fast enough.

686

since it will be fast enough.

687

688

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

688

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

689

number of changesets from c1 to base such that if number of changesets are

689

number of changesets from c1 to base such that if number of changesets are

690

more than the limit, full copytracing algorithm won't be used.

690

more than the limit, full copytracing algorithm won't be used.

691

"""

691

"""

692

if c1.rev() is None:

692

if c1.rev() is None:

693

c1 = c1.p1()

693

c1 = c1.p1()

694

if c1.mutable() and base.mutable():

694

if c1.mutable() and base.mutable():

695

sourcecommitlimit = repo.ui.configint(

695

sourcecommitlimit = repo.ui.configint(

696

b'experimental', b'copytrace.sourcecommitlimit'

696

b'experimental', b'copytrace.sourcecommitlimit'

697

)

697

)

698

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

698

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

699

return commits < sourcecommitlimit

699

return commits < sourcecommitlimit

700

return False

700

return False

701

702

703

def _checksinglesidecopies(

703

def _checksinglesidecopies(

704

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

704

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

705

):

705

):

706

if src not in m2:

706

if src not in m2:

707

# deleted on side 2

707

# deleted on side 2

708

if src not in m1:

708

if src not in m1:

709

# renamed on side 1, deleted on side 2

709

# renamed on side 1, deleted on side 2

710

renamedelete[src] = dsts1

710

renamedelete[src] = dsts1

711

elif src not in mb:

711

elif src not in mb:

712

# Work around the "short-circuit to avoid issues with merge states"

712

# Work around the "short-circuit to avoid issues with merge states"

713

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

713

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

714

# destination doesn't exist in y.

714

# destination doesn't exist in y.

715

pass

715

pass

716

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

716

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

717

return

717

return

718

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

718

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

719

# modified on side 2

719

# modified on side 2

720

for dst in dsts1:

720

for dst in dsts1:

721

copy[dst] = src

721

copy[dst] = src

722

723

724

class branch_copies(object):

724

class branch_copies(object):

725

"""Information about copies made on one side of a merge/graft.

725

"""Information about copies made on one side of a merge/graft.

726

727

"copy" is a mapping from destination name -> source name,

727

"copy" is a mapping from destination name -> source name,

728

where source is in c1 and destination is in c2 or vice-versa.

728

where source is in c1 and destination is in c2 or vice-versa.

729

730

"movewithdir" is a mapping from source name -> destination name,

730

"movewithdir" is a mapping from source name -> destination name,

731

where the file at source present in one context but not the other

731

where the file at source present in one context but not the other

732

needs to be moved to destination by the merge process, because the

732

needs to be moved to destination by the merge process, because the

733

other context moved the directory it is in.

733

other context moved the directory it is in.

734

735

"renamedelete" is a mapping of source name -> list of destination

735

"renamedelete" is a mapping of source name -> list of destination

736

names for files deleted in c1 that were renamed in c2 or vice-versa.

736

names for files deleted in c1 that were renamed in c2 or vice-versa.

737

738

"dirmove" is a mapping of detected source dir -> destination dir renames.

738

"dirmove" is a mapping of detected source dir -> destination dir renames.

739

This is needed for handling changes to new files previously grafted into

739

This is needed for handling changes to new files previously grafted into

740

renamed directories.

740

renamed directories.

741

"""

741

"""

742

743

def __init__(

743

def __init__(

744

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

744

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

745

):

745

):

746

self.copy = {} if copy is None else copy

746

self.copy = {} if copy is None else copy

747

self.renamedelete = {} if renamedelete is None else renamedelete

747

self.renamedelete = {} if renamedelete is None else renamedelete

748

self.dirmove = {} if dirmove is None else dirmove

748

self.dirmove = {} if dirmove is None else dirmove

749

self.movewithdir = {} if movewithdir is None else movewithdir

749

self.movewithdir = {} if movewithdir is None else movewithdir

750

751

def __repr__(self):

751

def __repr__(self):

752

return (

752

return (

753

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

753

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

754

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

754

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

755

)

755

)

756

757

758

def _fullcopytracing(repo, c1, c2, base):

758

def _fullcopytracing(repo, c1, c2, base):

759

""" The full copytracing algorithm which finds all the new files that were

759

""" The full copytracing algorithm which finds all the new files that were

760

added from merge base up to the top commit and for each file it checks if

760

added from merge base up to the top commit and for each file it checks if

761

this file was copied from another file.

761

this file was copied from another file.

762

763

This is pretty slow when a lot of changesets are involved but will track all

763

This is pretty slow when a lot of changesets are involved but will track all

764

the copies.

764

the copies.

765

"""

765

"""

766

m1 = c1.manifest()

766

m1 = c1.manifest()

767

m2 = c2.manifest()

767

m2 = c2.manifest()

768

mb = base.manifest()

768

mb = base.manifest()

769

770

copies1 = pathcopies(base, c1)

770

copies1 = pathcopies(base, c1)

771

copies2 = pathcopies(base, c2)

771

copies2 = pathcopies(base, c2)

772

773

if not (copies1 or copies2):

773

if not (copies1 or copies2):

774

return branch_copies(), branch_copies(), {}

774

return branch_copies(), branch_copies(), {}

775

776

inversecopies1 = {}

776

inversecopies1 = {}

777

inversecopies2 = {}

777

inversecopies2 = {}

778

for dst, src in copies1.items():

778

for dst, src in copies1.items():

779

inversecopies1.setdefault(src, []).append(dst)

779

inversecopies1.setdefault(src, []).append(dst)

780

for dst, src in copies2.items():

780

for dst, src in copies2.items():

781

inversecopies2.setdefault(src, []).append(dst)

781

inversecopies2.setdefault(src, []).append(dst)

782

783

copy1 = {}

783

copy1 = {}

784

copy2 = {}

784

copy2 = {}

785

diverge = {}

785

diverge = {}

786

renamedelete1 = {}

786

renamedelete1 = {}

787

renamedelete2 = {}

787

renamedelete2 = {}

788

allsources = set(inversecopies1) | set(inversecopies2)

788

allsources = set(inversecopies1) | set(inversecopies2)

789

for src in allsources:

789

for src in allsources:

790

dsts1 = inversecopies1.get(src)

790

dsts1 = inversecopies1.get(src)

791

dsts2 = inversecopies2.get(src)

791

dsts2 = inversecopies2.get(src)

792

if dsts1 and dsts2:

792

if dsts1 and dsts2:

793

# copied/renamed on both sides

793

# copied/renamed on both sides

794

if src not in m1 and src not in m2:

794

if src not in m1 and src not in m2:

795

# renamed on both sides

795

# renamed on both sides

796

dsts1 = set(dsts1)

796

dsts1 = set(dsts1)

797

dsts2 = set(dsts2)

797

dsts2 = set(dsts2)

798

# If there's some overlap in the rename destinations, we

798

# If there's some overlap in the rename destinations, we

799

# consider it not divergent. For example, if side 1 copies 'a'

799

# consider it not divergent. For example, if side 1 copies 'a'

800

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

800

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

801

# and 'd' and deletes 'a'.

801

# and 'd' and deletes 'a'.

802

if dsts1 & dsts2:

802

if dsts1 & dsts2:

803

for dst in dsts1 & dsts2:

803

for dst in dsts1 & dsts2:

804

copy1[dst] = src

804

copy1[dst] = src

805

copy2[dst] = src

805

copy2[dst] = src

806

else:

806

else:

807

diverge[src] = sorted(dsts1 | dsts2)

807

diverge[src] = sorted(dsts1 | dsts2)

808

elif src in m1 and src in m2:

808

elif src in m1 and src in m2:

809

# copied on both sides

809

# copied on both sides

810

dsts1 = set(dsts1)

810

dsts1 = set(dsts1)

811

dsts2 = set(dsts2)

811

dsts2 = set(dsts2)

812

for dst in dsts1 & dsts2:

812

for dst in dsts1 & dsts2:

813

copy1[dst] = src

813

copy1[dst] = src

814

copy2[dst] = src

814

copy2[dst] = src

815

# TODO: Handle cases where it was renamed on one side and copied

815

# TODO: Handle cases where it was renamed on one side and copied

816

# on the other side

816

# on the other side

817

elif dsts1:

817

elif dsts1:

818

# copied/renamed only on side 1

818

# copied/renamed only on side 1

819

_checksinglesidecopies(

819

_checksinglesidecopies(

820

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

820

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

821

)

821

)

822

elif dsts2:

822

elif dsts2:

823

# copied/renamed only on side 2

823

# copied/renamed only on side 2

824

_checksinglesidecopies(

824

_checksinglesidecopies(

825

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

825

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

826

)

826

)

827

828

# find interesting file sets from manifests

828

# find interesting file sets from manifests

829

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

829

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

830

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

830

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

831

u1 = sorted(addedinm1 - addedinm2)

831

u1 = sorted(addedinm1 - addedinm2)

832

u2 = sorted(addedinm2 - addedinm1)

832

u2 = sorted(addedinm2 - addedinm1)

833

834

header = b" unmatched files in %s"

834

header = b" unmatched files in %s"

835

if u1:

835

if u1:

836

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

836

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

837

if u2:

837

if u2:

838

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

838

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

839

840

if repo.ui.debugflag:

840

if repo.ui.debugflag:

841

renamedeleteset = set()

841

renamedeleteset = set()

842

divergeset = set()

842

divergeset = set()

843

for dsts in diverge.values():

843

for dsts in diverge.values():

844

divergeset.update(dsts)

844

divergeset.update(dsts)

845

for dsts in renamedelete1.values():

845

for dsts in renamedelete1.values():

846

renamedeleteset.update(dsts)

846

renamedeleteset.update(dsts)

847

for dsts in renamedelete2.values():

847

for dsts in renamedelete2.values():

848

renamedeleteset.update(dsts)

848

renamedeleteset.update(dsts)

849

850

repo.ui.debug(

850

repo.ui.debug(

851

b" all copies found (* = to merge, ! = divergent, "

851

b" all copies found (* = to merge, ! = divergent, "

852

b"% = renamed and deleted):\n"

852

b"% = renamed and deleted):\n"

853

)

853

)

854

for side, copies in ((b"local", copies1), (b"remote", copies2)):

854

for side, copies in ((b"local", copies1), (b"remote", copies2)):

855

if not copies:

855

if not copies:

856

continue

856

continue

857

repo.ui.debug(b" on %s side:\n" % side)

857

repo.ui.debug(b" on %s side:\n" % side)

858

for f in sorted(copies):

858

for f in sorted(copies):

859

note = b""

859

note = b""

860

if f in copy1 or f in copy2:

860

if f in copy1 or f in copy2:

861

note += b"*"

861

note += b"*"

862

if f in divergeset:

862

if f in divergeset:

863

note += b"!"

863

note += b"!"

864

if f in renamedeleteset:

864

if f in renamedeleteset:

865

note += b"%"

865

note += b"%"

866

repo.ui.debug(

866

repo.ui.debug(

867

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

867

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

868

)

868

)

869

del renamedeleteset

869

del renamedeleteset

870

del divergeset

870

del divergeset

871

872

repo.ui.debug(b" checking for directory renames\n")

872

repo.ui.debug(b" checking for directory renames\n")

873

874

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

874

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

875

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

875

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

876

877

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

877

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

878

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

878

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

879

880

return branch_copies1, branch_copies2, diverge

880

return branch_copies1, branch_copies2, diverge

881

882

883

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

883

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

884

"""Finds moved directories and files that should move with them.

884

"""Finds moved directories and files that should move with them.

885

886

ctx: the context for one of the sides

886

ctx: the context for one of the sides

887

copy: files copied on the same side (as ctx)

887

copy: files copied on the same side (as ctx)

888

fullcopy: files copied on the same side (as ctx), including those that

888

fullcopy: files copied on the same side (as ctx), including those that

889

merge.manifestmerge() won't care about

889

merge.manifestmerge() won't care about

890

addedfiles: added files on the other side (compared to ctx)

890

addedfiles: added files on the other side (compared to ctx)

891

"""

891

"""

892

# generate a directory move map

892

# generate a directory move map

893

d = ctx.dirs()

893

d = ctx.dirs()

894

invalid = set()

894

invalid = set()

895

dirmove = {}

895

dirmove = {}

896

897

# examine each file copy for a potential directory move, which is

897

# examine each file copy for a potential directory move, which is

898

# when all the files in a directory are moved to a new directory

898

# when all the files in a directory are moved to a new directory

899

for dst, src in pycompat.iteritems(fullcopy):

899

for dst, src in pycompat.iteritems(fullcopy):

900

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

900

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

901

if dsrc in invalid:

901

if dsrc in invalid:

902

# already seen to be uninteresting

902

# already seen to be uninteresting

903

continue

903

continue

904

elif dsrc in d and ddst in d:

904

elif dsrc in d and ddst in d:

905

# directory wasn't entirely moved locally

905

# directory wasn't entirely moved locally

906

invalid.add(dsrc)

906

invalid.add(dsrc)

907

elif dsrc in dirmove and dirmove[dsrc] != ddst:

907

elif dsrc in dirmove and dirmove[dsrc] != ddst:

908

# files from the same directory moved to two different places

908

# files from the same directory moved to two different places

909

invalid.add(dsrc)

909

invalid.add(dsrc)

910

else:

910

else:

911

# looks good so far

911

# looks good so far

912

dirmove[dsrc] = ddst

912

dirmove[dsrc] = ddst

913

914

for i in invalid:

914

for i in invalid:

915

if i in dirmove:

915

if i in dirmove:

916

del dirmove[i]

916

del dirmove[i]

917

del d, invalid

917

del d, invalid

918

919

if not dirmove:

919

if not dirmove:

920

return {}, {}

920

return {}, {}

921

922

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

922

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

923

924

for d in dirmove:

924

for d in dirmove:

925

repo.ui.debug(

925

repo.ui.debug(

926

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

926

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

927

)

927

)

928

929

movewithdir = {}

929

movewithdir = {}

930

# check unaccounted nonoverlapping files against directory moves

930

# check unaccounted nonoverlapping files against directory moves

931

for f in addedfiles:

931

for f in addedfiles:

932

if f not in fullcopy:

932

if f not in fullcopy:

933

for d in dirmove:

933

for d in dirmove:

934

if f.startswith(d):

934

if f.startswith(d):

935

# new file added in a directory that was moved, move it

935

# new file added in a directory that was moved, move it

936

df = dirmove[d] + f[len(d) :]

936

df = dirmove[d] + f[len(d) :]

937

if df not in copy:

937

if df not in copy:

938

movewithdir[f] = df

938

movewithdir[f] = df

939

repo.ui.debug(

939

repo.ui.debug(

940

b" pending file src: '%s' -> dst: '%s'\n"

940

b" pending file src: '%s' -> dst: '%s'\n"

941

% (f, df)

941

% (f, df)

942

)

942

)

943

break

943

break

944

945

return dirmove, movewithdir

945

return dirmove, movewithdir

946

947

948

def _heuristicscopytracing(repo, c1, c2, base):

948

def _heuristicscopytracing(repo, c1, c2, base):

949

""" Fast copytracing using filename heuristics

949

""" Fast copytracing using filename heuristics

950

951

Assumes that moves or renames are of following two types:

951

Assumes that moves or renames are of following two types:

952

953

1) Inside a directory only (same directory name but different filenames)

953

1) Inside a directory only (same directory name but different filenames)

954

2) Move from one directory to another

954

2) Move from one directory to another

955

(same filenames but different directory names)

955

(same filenames but different directory names)

956

957

Works only when there are no merge commits in the "source branch".

957

Works only when there are no merge commits in the "source branch".

958

Source branch is commits from base up to c2 not including base.

958

Source branch is commits from base up to c2 not including base.

959

960

If merge is involved it fallbacks to _fullcopytracing().

960

If merge is involved it fallbacks to _fullcopytracing().

961

962

Can be used by setting the following config:

962

Can be used by setting the following config:

963

964

[experimental]

964

[experimental]

965

copytrace = heuristics

965

copytrace = heuristics

966

967

In some cases the copy/move candidates found by heuristics can be very large

967

In some cases the copy/move candidates found by heuristics can be very large

968

in number and that will make the algorithm slow. The number of possible

968

in number and that will make the algorithm slow. The number of possible

969

candidates to check can be limited by using the config

969

candidates to check can be limited by using the config

970

`experimental.copytrace.movecandidateslimit` which defaults to 100.

970

`experimental.copytrace.movecandidateslimit` which defaults to 100.

971

"""

971

"""

972

973

if c1.rev() is None:

973

if c1.rev() is None:

974

c1 = c1.p1()

974

c1 = c1.p1()

975

if c2.rev() is None:

975

if c2.rev() is None:

976

c2 = c2.p1()

976

c2 = c2.p1()

977

978

changedfiles = set()

978

changedfiles = set()

979

m1 = c1.manifest()

979

m1 = c1.manifest()

980

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

980

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

981

# If base is not in c2 branch, we switch to fullcopytracing

981

# If base is not in c2 branch, we switch to fullcopytracing

982

repo.ui.debug(

982

repo.ui.debug(

983

b"switching to full copytracing as base is not "

983

b"switching to full copytracing as base is not "

984

b"an ancestor of c2\n"

984

b"an ancestor of c2\n"

985

)

985

)

986

return _fullcopytracing(repo, c1, c2, base)

986

return _fullcopytracing(repo, c1, c2, base)

987

988

ctx = c2

988

ctx = c2

989

while ctx != base:

989

while ctx != base:

990

if len(ctx.parents()) == 2:

990

if len(ctx.parents()) == 2:

991

# To keep things simple let's not handle merges

991

# To keep things simple let's not handle merges

992

repo.ui.debug(b"switching to full copytracing because of merges\n")

992

repo.ui.debug(b"switching to full copytracing because of merges\n")

993

return _fullcopytracing(repo, c1, c2, base)

993

return _fullcopytracing(repo, c1, c2, base)

994

changedfiles.update(ctx.files())

994

changedfiles.update(ctx.files())

995

ctx = ctx.p1()

995

ctx = ctx.p1()

996

997

copies2 = {}

997

copies2 = {}

998

cp = _forwardcopies(base, c2)

998

cp = _forwardcopies(base, c2)

999

for dst, src in pycompat.iteritems(cp):

999

for dst, src in pycompat.iteritems(cp):

1000

if src in m1:

1000

if src in m1:

1001

copies2[dst] = src

1001

copies2[dst] = src

1002

1003

# file is missing if it isn't present in the destination, but is present in

1003

# file is missing if it isn't present in the destination, but is present in

1004

# the base and present in the source.

1004

# the base and present in the source.

1005

# Presence in the base is important to exclude added files, presence in the

1005

# Presence in the base is important to exclude added files, presence in the

1006

# source is important to exclude removed files.

1006

# source is important to exclude removed files.

1007

filt = lambda f: f not in m1 and f in base and f in c2

1007

filt = lambda f: f not in m1 and f in base and f in c2

1008

missingfiles = [f for f in changedfiles if filt(f)]

1008

missingfiles = [f for f in changedfiles if filt(f)]

1009

1010

copies1 = {}

1010

copies1 = {}

1011

if missingfiles:

1011

if missingfiles:

1012

basenametofilename = collections.defaultdict(list)

1012

basenametofilename = collections.defaultdict(list)

1013

dirnametofilename = collections.defaultdict(list)

1013

dirnametofilename = collections.defaultdict(list)

1014

1015

for f in m1.filesnotin(base.manifest()):

1015

for f in m1.filesnotin(base.manifest()):

1016

basename = os.path.basename(f)

1016

basename = os.path.basename(f)

1017

dirname = os.path.dirname(f)

1017

dirname = os.path.dirname(f)

1018

basenametofilename[basename].append(f)

1018

basenametofilename[basename].append(f)

1019

dirnametofilename[dirname].append(f)

1019

dirnametofilename[dirname].append(f)

1020

1021

for f in missingfiles:

1021

for f in missingfiles:

1022

basename = os.path.basename(f)

1022

basename = os.path.basename(f)

1023

dirname = os.path.dirname(f)

1023

dirname = os.path.dirname(f)

1024

samebasename = basenametofilename[basename]

1024

samebasename = basenametofilename[basename]

1025

samedirname = dirnametofilename[dirname]

1025

samedirname = dirnametofilename[dirname]

1026

movecandidates = samebasename + samedirname

1026

movecandidates = samebasename + samedirname

1027

# f is guaranteed to be present in c2, that's why

1027

# f is guaranteed to be present in c2, that's why

1028

# c2.filectx(f) won't fail

1028

# c2.filectx(f) won't fail

1029

f2 = c2.filectx(f)

1029

f2 = c2.filectx(f)

1030

# we can have a lot of candidates which can slow down the heuristics

1030

# we can have a lot of candidates which can slow down the heuristics

1031

# config value to limit the number of candidates moves to check

1031

# config value to limit the number of candidates moves to check

1032

maxcandidates = repo.ui.configint(

1032

maxcandidates = repo.ui.configint(

1033

b'experimental', b'copytrace.movecandidateslimit'

1033

b'experimental', b'copytrace.movecandidateslimit'

1034

)

1034

)

1035

1036

if len(movecandidates) > maxcandidates:

1036

if len(movecandidates) > maxcandidates:

1037

repo.ui.status(

1037

repo.ui.status(

1038

_(

1038

_(

1039

b"skipping copytracing for '%s', more "

1039

b"skipping copytracing for '%s', more "

1040

b"candidates than the limit: %d\n"

1040

b"candidates than the limit: %d\n"

1041

)

1041

)

1042

% (f, len(movecandidates))

1042

% (f, len(movecandidates))

1043

)

1043

)

1044

continue

1044

continue

1045

1046

for candidate in movecandidates:

1046

for candidate in movecandidates:

1047

f1 = c1.filectx(candidate)

1047

f1 = c1.filectx(candidate)

1048

if _related(f1, f2):

1048

if _related(f1, f2):

1049

# if there are a few related copies then we'll merge

1049

# if there are a few related copies then we'll merge

1050

# changes into all of them. This matches the behaviour

1050

# changes into all of them. This matches the behaviour

1051

# of upstream copytracing

1051

# of upstream copytracing

1052

copies1[candidate] = f

1052

copies1[candidate] = f

1053

1054

return branch_copies(copies1), branch_copies(copies2), {}

1054

return branch_copies(copies1), branch_copies(copies2), {}

1055

1056

1057

def _related(f1, f2):

1057

def _related(f1, f2):

1058

"""return True if f1 and f2 filectx have a common ancestor

1058

"""return True if f1 and f2 filectx have a common ancestor

1059

1060

Walk back to common ancestor to see if the two files originate

1060

Walk back to common ancestor to see if the two files originate

1061

from the same file. Since workingfilectx's rev() is None it messes

1061

from the same file. Since workingfilectx's rev() is None it messes

1062

up the integer comparison logic, hence the pre-step check for

1062

up the integer comparison logic, hence the pre-step check for

1063

None (f1 and f2 can only be workingfilectx's initially).

1063

None (f1 and f2 can only be workingfilectx's initially).

1064

"""

1064

"""

1065

1066

if f1 == f2:

1066

if f1 == f2:

1067

return True # a match

1067

return True # a match

1068

1069

g1, g2 = f1.ancestors(), f2.ancestors()

1069

g1, g2 = f1.ancestors(), f2.ancestors()

1070

try:

1070

try:

1071

f1r, f2r = f1.linkrev(), f2.linkrev()

1071

f1r, f2r = f1.linkrev(), f2.linkrev()

1072

1073

if f1r is None:

1073

if f1r is None:

1074

f1 = next(g1)

1074

f1 = next(g1)

1075

if f2r is None:

1075

if f2r is None:

1076

f2 = next(g2)

1076

f2 = next(g2)

1077

1078

while True:

1078

while True:

1079

f1r, f2r = f1.linkrev(), f2.linkrev()

1079

f1r, f2r = f1.linkrev(), f2.linkrev()

1080

if f1r > f2r:

1080

if f1r > f2r:

1081

f1 = next(g1)

1081

f1 = next(g1)

1082

elif f2r > f1r:

1082

elif f2r > f1r:

1083

f2 = next(g2)

1083

f2 = next(g2)

1084

else: # f1 and f2 point to files in the same linkrev

1084

else: # f1 and f2 point to files in the same linkrev

1085

return f1 == f2 # true if they point to the same file

1085

return f1 == f2 # true if they point to the same file

1086

except StopIteration:

1086

except StopIteration:

1087

return False

1087

return False

1088

1089

1090

def graftcopies(wctx, ctx, base):

1090

def graftcopies(wctx, ctx, base):

1091

"""reproduce copies between base and ctx in the wctx

1091

"""reproduce copies between base and ctx in the wctx

1092

1093

Unlike mergecopies(), this function will only consider copies between base

1093

Unlike mergecopies(), this function will only consider copies between base

1094

and ctx; it will ignore copies between base and wctx. Also unlike

1094

and ctx; it will ignore copies between base and wctx. Also unlike

1095

mergecopies(), this function will apply copies to the working copy (instead

1095

mergecopies(), this function will apply copies to the working copy (instead

1096

of just returning information about the copies). That makes it cheaper

1096

of just returning information about the copies). That makes it cheaper

1097

(especially in the common case of base==ctx.p1()) and useful also when

1097

(especially in the common case of base==ctx.p1()) and useful also when

1098

experimental.copytrace=off.

1098

experimental.copytrace=off.

1099

1100

merge.update() will have already marked most copies, but it will only

1100

merge.update() will have already marked most copies, but it will only

1101

mark copies if it thinks the source files are related (see

1101

mark copies if it thinks the source files are related (see

1102

merge._related()). It will also not mark copies if the file wasn't modified

1102

merge._related()). It will also not mark copies if the file wasn't modified

1103

on the local side. This function adds the copies that were "missed"

1103

on the local side. This function adds the copies that were "missed"

1104

by merge.update().

1104

by merge.update().

1105

"""

1105

"""

1106

new_copies = pathcopies(base, ctx)

1106

new_copies = pathcopies(base, ctx)

1107

_filter(wctx.p1(), wctx, new_copies)

1107

_filter(wctx.p1(), wctx, new_copies)

1108

for dst, src in pycompat.iteritems(new_copies):

1108

for dst, src in pycompat.iteritems(new_copies):

1109

wctx[dst].markcopied(src)

1109

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import flagutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 # A small cache to avoid doing the work twice for merges
                 #
                 # In the vast majority of cases, if we ask information for a revision
                 # about 1 parent, we'll later ask it for the other. So it make sense to
                 # keep the information around when reaching the first parent of a merge
                 # and dropping it after it was provided for the second parents.
                 #
                 # It exists cases were only one parent of the merge will be walked. It
                 # happens when the "destination" the copy tracing is descendant from a
                 # new root, not common with the "source". In that case, we will only walk
                 # through merge parents that are descendant of changesets common
                 # between "source" and "destination".
                 #
                 # With the current case implementation if such changesets have a copy
                 # information, we'll keep them in memory until the end of
                 # _changesetforwardcopies. We don't expect the case to be frequent
                 # enough to matters.
                 #
                 # In addition, it would be possible to reach pathological case, were
                 # many first parent are met before any second parent is reached. In
                 # that case the cache could grow. If this even become an issue one can
                 # safely introduce a maximum cache size. This would trade extra CPU/IO
                 # time to save memory.
                 merge_caches = {}
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     value = None
                     e = merge_caches.pop(rev, None)
                     if e is not None:
                         return e
                     changes = None
                     if flags(rev) & HASCOPIESINFO:
                         changes = changelogrevision(rev).changes
                     value = (p1, p2, changes)
                     if p1 != node.nullrev and p2 != node.nullrev:
                         # XXX some case we over cache, IGNORE
                         merge_caches[rev] = value
                     return value
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev  # XXX we should had chaching to this.
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 if repo.filecopiesmode == b'changeset-sidedata':
                     revinfo = _revinfo_getter(repo)
                     return _combine_changeset_copies(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, changes = revinfo(c)
                         childcopies = {}
                         if r == p1:
                             parent = 1
                             if changes is not None:
                                 childcopies = changes.copied_from_p1
                         else:
                             assert r == p2
                             parent = 2
                             if changes is not None:
                                 childcopies = changes.copied_from_p2
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         if changes is not None:
                             for f in changes.removed:
                                 if f in newcopies:
                                     if newcopies is copies:
                                         # copy on write to avoid affecting potential other
                                         # branches.  when there are no other branches, this
                                         # could be avoided.
                                         newcopies = copies.copy()
                                     newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict(
                                     othercopies, newcopies, isancestor, changes
                                 )
                             else:
                                 _merge_copies_dict(
                                     newcopies, othercopies, isancestor, changes
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if new_tt == other_tt:
                             minor[dest] = value
                         elif (
                             changes is not None
                             and value[1] is None
                             and dest in changes.salvaged
                         ):
                             pass
                         elif (
                             changes is not None
                             and other[1] is None
                             and dest in changes.salvaged
                         ):
                             minor[dest] = value
-                        elif not isancestor(new_tt, other_tt):
+                        elif changes is not None and dest in changes.merged:
                             minor[dest] = value
-                        elif changes is not None and dest in changes.merged:
+                        elif not isancestor(new_tt, other_tt):
                             minor[dest] = value
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return (
                         '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>'
                         % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)