upstream/mercurial-mirror Commit - r46632:b9588ff9

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

policy,

20

policy,

21

pycompat,

21

pycompat,

22

util,

22

util,

23

)

23

)

24

25

26

from .utils import stringutil

26

from .utils import stringutil

27

28

from .revlogutils import flagutil

28

from .revlogutils import flagutil

29

30

rustmod = policy.importrust("copy_tracing")

30

rustmod = policy.importrust("copy_tracing")

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(prefix, suffix):

67

def _chain(prefix, suffix):

68

"""chain two sets of copies 'prefix' and 'suffix'"""

68

"""chain two sets of copies 'prefix' and 'suffix'"""

69

result = prefix.copy()

69

result = prefix.copy()

70

for key, value in pycompat.iteritems(suffix):

70

for key, value in pycompat.iteritems(suffix):

71

result[key] = prefix.get(value, value)

71

result[key] = prefix.get(value, value)

72

return result

72

return result

73

74

75

def _tracefile(fctx, am, basemf):

75

def _tracefile(fctx, am, basemf):

76

"""return file context that is the ancestor of fctx present in ancestor

76

"""return file context that is the ancestor of fctx present in ancestor

77

manifest am

77

manifest am

78

79

Note: we used to try and stop after a given limit, however checking if that

79

Note: we used to try and stop after a given limit, however checking if that

80

limit is reached turned out to be very expensive. we are better off

80

limit is reached turned out to be very expensive. we are better off

81

disabling that feature."""

81

disabling that feature."""

82

83

for f in fctx.ancestors():

83

for f in fctx.ancestors():

84

path = f.path()

84

path = f.path()

85

if am.get(path, None) == f.filenode():

85

if am.get(path, None) == f.filenode():

86

return path

86

return path

87

if basemf and basemf.get(path, None) == f.filenode():

87

if basemf and basemf.get(path, None) == f.filenode():

88

return path

88

return path

89

90

91

def _dirstatecopies(repo, match=None):

91

def _dirstatecopies(repo, match=None):

92

ds = repo.dirstate

92

ds = repo.dirstate

93

c = ds.copies().copy()

93

c = ds.copies().copy()

94

for k in list(c):

94

for k in list(c):

95

if ds[k] not in b'anm' or (match and not match(k)):

95

if ds[k] not in b'anm' or (match and not match(k)):

96

del c[k]

96

del c[k]

97

return c

97

return c

98

99

100

def _computeforwardmissing(a, b, match=None):

100

def _computeforwardmissing(a, b, match=None):

101

"""Computes which files are in b but not a.

101

"""Computes which files are in b but not a.

102

This is its own function so extensions can easily wrap this call to see what

102

This is its own function so extensions can easily wrap this call to see what

103

files _forwardcopies is about to process.

103

files _forwardcopies is about to process.

104

"""

104

"""

105

ma = a.manifest()

105

ma = a.manifest()

106

mb = b.manifest()

106

mb = b.manifest()

107

return mb.filesnotin(ma, match=match)

107

return mb.filesnotin(ma, match=match)

108

109

110

def usechangesetcentricalgo(repo):

110

def usechangesetcentricalgo(repo):

111

"""Checks if we should use changeset-centric copy algorithms"""

111

"""Checks if we should use changeset-centric copy algorithms"""

112

if repo.filecopiesmode == b'changeset-sidedata':

112

if repo.filecopiesmode == b'changeset-sidedata':

113

return True

113

return True

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

115

changesetsource = (b'changeset-only', b'compatibility')

115

changesetsource = (b'changeset-only', b'compatibility')

116

return readfrom in changesetsource

116

return readfrom in changesetsource

117

118

119

def _committedforwardcopies(a, b, base, match):

119

def _committedforwardcopies(a, b, base, match):

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

121

# files might have to be traced back to the fctx parent of the last

121

# files might have to be traced back to the fctx parent of the last

122

# one-side-only changeset, but not further back than that

122

# one-side-only changeset, but not further back than that

123

repo = a._repo

123

repo = a._repo

124

125

if usechangesetcentricalgo(repo):

125

if usechangesetcentricalgo(repo):

126

return _changesetforwardcopies(a, b, match)

126

return _changesetforwardcopies(a, b, match)

127

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

129

dbg = repo.ui.debug

129

dbg = repo.ui.debug

130

if debug:

130

if debug:

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

132

am = a.manifest()

132

am = a.manifest()

133

basemf = None if base is None else base.manifest()

133

basemf = None if base is None else base.manifest()

134

135

# find where new files came from

135

# find where new files came from

136

# we currently don't try to find where old files went, too expensive

136

# we currently don't try to find where old files went, too expensive

137

# this means we can miss a case like 'hg rm b; hg cp a b'

137

# this means we can miss a case like 'hg rm b; hg cp a b'

138

cm = {}

138

cm = {}

139

140

# Computing the forward missing is quite expensive on large manifests, since

140

# Computing the forward missing is quite expensive on large manifests, since

141

# it compares the entire manifests. We can optimize it in the common use

141

# it compares the entire manifests. We can optimize it in the common use

142

# case of computing what copies are in a commit versus its parent (like

142

# case of computing what copies are in a commit versus its parent (like

143

# during a rebase or histedit). Note, we exclude merge commits from this

143

# during a rebase or histedit). Note, we exclude merge commits from this

144

# optimization, since the ctx.files() for a merge commit is not correct for

144

# optimization, since the ctx.files() for a merge commit is not correct for

145

# this comparison.

145

# this comparison.

146

forwardmissingmatch = match

146

forwardmissingmatch = match

147

if b.p1() == a and b.p2().node() == node.nullid:

147

if b.p1() == a and b.p2().node() == node.nullid:

148

filesmatcher = matchmod.exact(b.files())

148

filesmatcher = matchmod.exact(b.files())

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

151

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

153

154

if debug:

154

if debug:

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

156

157

for f in sorted(missing):

157

for f in sorted(missing):

158

if debug:

158

if debug:

159

dbg(b'debug.copies: tracing file: %s\n' % f)

159

dbg(b'debug.copies: tracing file: %s\n' % f)

160

fctx = b[f]

160

fctx = b[f]

161

fctx._ancestrycontext = ancestrycontext

161

fctx._ancestrycontext = ancestrycontext

162

163

if debug:

163

if debug:

164

start = util.timer()

164

start = util.timer()

165

opath = _tracefile(fctx, am, basemf)

165

opath = _tracefile(fctx, am, basemf)

166

if opath:

166

if opath:

167

if debug:

167

if debug:

168

dbg(b'debug.copies: rename of: %s\n' % opath)

168

dbg(b'debug.copies: rename of: %s\n' % opath)

169

cm[f] = opath

169

cm[f] = opath

170

if debug:

170

if debug:

171

dbg(

171

dbg(

172

b'debug.copies: time: %f seconds\n'

172

b'debug.copies: time: %f seconds\n'

173

% (util.timer() - start)

173

% (util.timer() - start)

174

)

174

)

175

return cm

175

return cm

176

177

178

def _revinfo_getter(repo):

178

def _revinfo_getter(repo):

179

"""returns a function that returns the following data given a <rev>"

179

"""returns a function that returns the following data given a <rev>"

180

181

* p1: revision number of first parent

181

* p1: revision number of first parent

182

* p2: revision number of first parent

182

* p2: revision number of first parent

183

* changes: a ChangingFiles object

183

* changes: a ChangingFiles object

184

"""

184

"""

185

cl = repo.changelog

185

cl = repo.changelog

186

parents = cl.parentrevs

186

parents = cl.parentrevs

187

flags = cl.flags

187

flags = cl.flags

188

189

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

189

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

190

191

changelogrevision = cl.changelogrevision

191

changelogrevision = cl.changelogrevision

192

193

# A small cache to avoid doing the work twice for merges

193

# A small cache to avoid doing the work twice for merges

194

#

194

#

195

# In the vast majority of cases, if we ask information for a revision

195

# In the vast majority of cases, if we ask information for a revision

196

# about 1 parent, we'll later ask it for the other. So it make sense to

196

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# keep the information around when reaching the first parent of a merge

197

# keep the information around when reaching the first parent of a merge

198

# and dropping it after it was provided for the second parents.

198

# and dropping it after it was provided for the second parents.

199

#

199

#

200

# It exists cases were only one parent of the merge will be walked. It

200

# It exists cases were only one parent of the merge will be walked. It

201

# happens when the "destination" the copy tracing is descendant from a

201

# happens when the "destination" the copy tracing is descendant from a

202

# new root, not common with the "source". In that case, we will only walk

202

# new root, not common with the "source". In that case, we will only walk

203

# through merge parents that are descendant of changesets common

203

# through merge parents that are descendant of changesets common

204

# between "source" and "destination".

204

# between "source" and "destination".

205

#

205

#

206

# With the current case implementation if such changesets have a copy

206

# With the current case implementation if such changesets have a copy

207

# information, we'll keep them in memory until the end of

207

# information, we'll keep them in memory until the end of

208

# _changesetforwardcopies. We don't expect the case to be frequent

208

# _changesetforwardcopies. We don't expect the case to be frequent

209

# enough to matters.

209

# enough to matters.

210

#

210

#

211

# In addition, it would be possible to reach pathological case, were

211

# In addition, it would be possible to reach pathological case, were

212

# many first parent are met before any second parent is reached. In

212

# many first parent are met before any second parent is reached. In

213

# that case the cache could grow. If this even become an issue one can

213

# that case the cache could grow. If this even become an issue one can

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

214

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# time to save memory.

215

# time to save memory.

216

merge_caches = {}

216

merge_caches = {}

217

218

def revinfo(rev):

218

def revinfo(rev):

219

p1, p2 = parents(rev)

219

p1, p2 = parents(rev)

220

value = None

220

value = None

221

e = merge_caches.pop(rev, None)

221

e = merge_caches.pop(rev, None)

222

if e is not None:

222

if e is not None:

223

return e

223

return e

224

changes = None

224

changes = None

225

if flags(rev) & HASCOPIESINFO:

225

if flags(rev) & HASCOPIESINFO:

226

changes = changelogrevision(rev).changes

226

changes = changelogrevision(rev).changes

227

value = (p1, p2, changes)

227

value = (p1, p2, changes)

228

if p1 != node.nullrev and p2 != node.nullrev:

228

if p1 != node.nullrev and p2 != node.nullrev:

229

# XXX some case we over cache, IGNORE

229

# XXX some case we over cache, IGNORE

230

merge_caches[rev] = value

230

merge_caches[rev] = value

231

return value

231

return value

232

233

return revinfo

233

return revinfo

234

235

236

def cached_is_ancestor(is_ancestor):

236

def cached_is_ancestor(is_ancestor):

237

"""return a cached version of is_ancestor"""

237

"""return a cached version of is_ancestor"""

238

cache = {}

238

cache = {}

239

240

def _is_ancestor(anc, desc):

240

def _is_ancestor(anc, desc):

241

if anc > desc:

241

if anc > desc:

242

return False

242

return False

243

elif anc == desc:

243

elif anc == desc:

244

return True

244

return True

245

key = (anc, desc)

245

key = (anc, desc)

246

ret = cache.get(key)

246

ret = cache.get(key)

247

if ret is None:

247

if ret is None:

248

ret = cache[key] = is_ancestor(anc, desc)

248

ret = cache[key] = is_ancestor(anc, desc)

249

return ret

249

return ret

250

251

return _is_ancestor

251

return _is_ancestor

252

253

254

def _changesetforwardcopies(a, b, match):

254

def _changesetforwardcopies(a, b, match):

255

if a.rev() in (node.nullrev, b.rev()):

255

if a.rev() in (node.nullrev, b.rev()):

256

return {}

256

return {}

257

258

repo = a.repo().unfiltered()

258

repo = a.repo().unfiltered()

259

children = {}

259

children = {}

260

261

cl = repo.changelog

261

cl = repo.changelog

262

isancestor = cl.isancestorrev

262

isancestor = cl.isancestorrev

263

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

263

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

264

mrset = set(missingrevs)

264

mrset = set(missingrevs)

265

roots = set()

265

roots = set()

266

for r in missingrevs:

266

for r in missingrevs:

267

for p in cl.parentrevs(r):

267

for p in cl.parentrevs(r):

268

if p == node.nullrev:

268

if p == node.nullrev:

269

continue

269

continue

270

if p not in children:

270

if p not in children:

271

children[p] = [r]

271

children[p] = [r]

272

else:

272

else:

273

children[p].append(r)

273

children[p].append(r)

274

if p not in mrset:

274

if p not in mrset:

275

roots.add(p)

275

roots.add(p)

276

if not roots:

276

if not roots:

277

# no common revision to track copies from

277

# no common revision to track copies from

278

return {}

278

return {}

279

min_root = min(roots)

279

min_root = min(roots)

280

281

from_head = set(

281

from_head = set(

282

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

282

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

283

)

283

)

284

285

iterrevs = set(from_head)

285

iterrevs = set(from_head)

286

iterrevs &= mrset

286

iterrevs &= mrset

287

iterrevs.update(roots)

287

iterrevs.update(roots)

288

iterrevs.remove(b.rev())

288

iterrevs.remove(b.rev())

289

revs = sorted(iterrevs)

289

revs = sorted(iterrevs)

290

291

if repo.filecopiesmode == b'changeset-sidedata':

291

if repo.filecopiesmode == b'changeset-sidedata':

292

revinfo = _revinfo_getter(repo)

292

revinfo = _revinfo_getter(repo)

293

return _combine_changeset_copies(

293

return _combine_changeset_copies(

294

revs, children, b.rev(), revinfo, match, isancestor

294

revs, children, b.rev(), revinfo, match, isancestor

295

)

295

)

296

else:

296

else:

297

revinfo = _revinfo_getter_extra(repo)

297

revinfo = _revinfo_getter_extra(repo)

298

return _combine_changeset_copies_extra(

298

return _combine_changeset_copies_extra(

299

revs, children, b.rev(), revinfo, match, isancestor

299

revs, children, b.rev(), revinfo, match, isancestor

300

)

300

)

301

302

303

def _combine_changeset_copies(

303

def _combine_changeset_copies(

304

revs, children, targetrev, revinfo, match, isancestor

304

revs, children, targetrev, revinfo, match, isancestor

305

):

305

):

306

"""combine the copies information for each item of iterrevs

306

"""combine the copies information for each item of iterrevs

307

308

revs: sorted iterable of revision to visit

308

revs: sorted iterable of revision to visit

309

children: a {parent: [children]} mapping.

309

children: a {parent: [children]} mapping.

310

targetrev: the final copies destination revision (not in iterrevs)

310

targetrev: the final copies destination revision (not in iterrevs)

311

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

311

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

312

match: a matcher

312

match: a matcher

313

314

It returns the aggregated copies information for `targetrev`.

314

It returns the aggregated copies information for `targetrev`.

315

"""

315

"""

316

317

alwaysmatch = match.always()

317

alwaysmatch = match.always()

318

319

if rustmod is not None and alwaysmatch:

319

if rustmod is not None and alwaysmatch:

320

return rustmod.combine_changeset_copies(

320

return rustmod.combine_changeset_copies(

321

list(revs), children, targetrev, revinfo, isancestor

321

list(revs), children, targetrev, revinfo, isancestor

322

)

322

)

323

324

isancestor = cached_is_ancestor(isancestor)

324

isancestor = cached_is_ancestor(isancestor)

325

326

all_copies = {}

326

all_copies = {}

327

for r in revs:

327

for r in revs:

328

copies = all_copies.pop(r, None)

328

copies = all_copies.pop(r, None)

329

if copies is None:

329

if copies is None:

330

# this is a root

330

# this is a root

331

copies = {}

331

copies = {}

332

for i, c in enumerate(children[r]):

332

for i, c in enumerate(children[r]):

333

p1, p2, changes = revinfo(c)

333

p1, p2, changes = revinfo(c)

334

childcopies = {}

334

childcopies = {}

335

if r == p1:

335

if r == p1:

336

parent = 1

336

parent = 1

337

if changes is not None:

337

if changes is not None:

338

childcopies = changes.copied_from_p1

338

childcopies = changes.copied_from_p1

339

else:

339

else:

340

assert r == p2

340

assert r == p2

341

parent = 2

341

parent = 2

342

if changes is not None:

342

if changes is not None:

343

childcopies = changes.copied_from_p2

343

childcopies = changes.copied_from_p2

344

if not alwaysmatch:

344

if not alwaysmatch:

345

childcopies = {

345

childcopies = {

346

dst: src for dst, src in childcopies.items() if match(dst)

346

dst: src for dst, src in childcopies.items() if match(dst)

347

}

347

}

348

newcopies = copies

348

newcopies = copies

349

if childcopies:

349

if childcopies:

350

newcopies = copies.copy()

350

newcopies = copies.copy()

351

for dest, source in pycompat.iteritems(childcopies):

351

for dest, source in pycompat.iteritems(childcopies):

352

prev = copies.get(source)

352

prev = copies.get(source)

353

if prev is not None and prev[1] is not None:

353

if prev is not None and prev[1] is not None:

354

source = prev[1]

354

source = prev[1]

355

newcopies[dest] = (c, source)

355

newcopies[dest] = (c, source)

356

assert newcopies is not copies

356

assert newcopies is not copies

357

if changes is not None and changes.removed:

357

if changes is not None and changes.removed:

358

if newcopies is copies:

358

if newcopies is copies:

359

newcopies = copies.copy()

359

newcopies = copies.copy()

360

for f in changes.removed:

360

for f in changes.removed:

361

if f in newcopies:

361

if f in newcopies:

362

if newcopies is copies:

362

if newcopies is copies:

363

# copy on write to avoid affecting potential other

363

# copy on write to avoid affecting potential other

364

# branches. when there are no other branches, this

364

# branches. when there are no other branches, this

365

# could be avoided.

365

# could be avoided.

366

newcopies = copies.copy()

366

newcopies = copies.copy()

367

newcopies[f] = (c, None)

367

newcopies[f] = (c, None)

368

othercopies = all_copies.get(c)

368

othercopies = all_copies.get(c)

369

if othercopies is None:

369

if othercopies is None:

370

all_copies[c] = newcopies

370

all_copies[c] = newcopies

371

elif newcopies is othercopies:

371

elif newcopies is othercopies:

372

# nothing to merge:

372

# nothing to merge:

373

pass

373

pass

374

else:

374

else:

375

# we are the second parent to work on c, we need to merge our

375

# we are the second parent to work on c, we need to merge our

376

# work with the other.

376

# work with the other.

377

#

377

#

378

# In case of conflict, parent 1 take precedence over parent 2.

378

# In case of conflict, parent 1 take precedence over parent 2.

379

# This is an arbitrary choice made anew when implementing

379

# This is an arbitrary choice made anew when implementing

380

# changeset based copies. It was made without regards with

380

# changeset based copies. It was made without regards with

381

# potential filelog related behavior.

381

# potential filelog related behavior.

382

if parent == 1:

382

if parent == 1:

383

minor, major = othercopies, newcopies

383

minor, major = othercopies, newcopies

384

else:

384

else:

385

minor, major = newcopies, othercopies

385

minor, major = newcopies, othercopies

386

copies = _merge_copies_dict(minor, major, isancestor, changes)

386

copies = _merge_copies_dict(minor, major, isancestor, changes)

387

all_copies[c] = copies

387

all_copies[c] = copies

388

389

final_copies = {}

389

final_copies = {}

390

for dest, (tt, source) in all_copies[targetrev].items():

390

for dest, (tt, source) in all_copies[targetrev].items():

391

if source is not None:

391

if source is not None:

392

final_copies[dest] = source

392

final_copies[dest] = source

393

return final_copies

393

return final_copies

394

395

396

def _merge_copies_dict(minor, major, isancestor, changes):

396

def _merge_copies_dict(minor, major, isancestor, changes):

397

"""merge two copies-mapping together, minor and major

397

"""merge two copies-mapping together, minor and major

398

399

In case of conflict, value from "major" will be picked.

399

In case of conflict, value from "major" will be picked.

400

401

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

401

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

402

ancestors of `high_rev`,

402

ancestors of `high_rev`,

403

404

- `ismerged(path)`: callable return True if `path` have been merged in the

404

- `ismerged(path)`: callable return True if `path` have been merged in the

405

current revision,

405

current revision,

406

407

return the resulting dict (in practice, the "minor" object, updated)

407

return the resulting dict (in practice, the "minor" object, updated)

408

"""

408

"""

409

for dest, value in major.items():

409

for dest, value in major.items():

410

other = minor.get(dest)

410

other = minor.get(dest)

411

if other is None:

411

if other is None:

412

minor[dest] = value

412

minor[dest] = value

413

else:

413

else:

414

new_tt = value[0]

414

new_tt = value[0]

415

other_tt = other[0]

415

other_tt = other[0]

416

if value[1] == other[1]:

416

if value[1] == other[1]:

417

continue

417

continue

418

# content from "major" wins, unless it is older

418

# content from "major" wins, unless it is older

419

# than the branch point or there is a merge

419

# than the branch point or there is a merge

420

if new_tt == other_tt:

420

if new_tt == other_tt:

421

minor[dest] = value

421

minor[dest] = value

422

elif (

422

elif (

423

changes is not None

423

changes is not None

424

and value[1] is None

424

and value[1] is None

425

and dest in changes.salvaged

425

and dest in changes.salvaged

426

):

426

):

427

pass

427

pass

428

elif (

428

elif (

429

changes is not None

429

changes is not None

430

and other[1] is None

430

and other[1] is None

431

and dest in changes.salvaged

431

and dest in changes.salvaged

432

):

432

):

433

minor[dest] = value

433

minor[dest] = value

434

elif changes is not None and dest in changes.merged:

434

elif changes is not None and dest in changes.merged:

435

minor[dest] = value

435

minor[dest] = value

436

elif not isancestor(new_tt, other_tt):

436

elif not isancestor(new_tt, other_tt):

437

if value[1] is not None:

437

if value[1] is not None:

438

minor[dest] = value

438

minor[dest] = value

439

elif isancestor(other_tt, new_tt):

439

elif isancestor(other_tt, new_tt):

440

minor[dest] = value

440

minor[dest] = value

441

return minor

441

return minor

442

443

444

def _revinfo_getter_extra(repo):

444

def _revinfo_getter_extra(repo):

445

"""return a function that return multiple data given a <rev>"i

445

"""return a function that return multiple data given a <rev>"i

446

447

* p1: revision number of first parent

447

* p1: revision number of first parent

448

* p2: revision number of first parent

448

* p2: revision number of first parent

449

* p1copies: mapping of copies from p1

449

* p1copies: mapping of copies from p1

450

* p2copies: mapping of copies from p2

450

* p2copies: mapping of copies from p2

451

* removed: a list of removed files

451

* removed: a list of removed files

452

* ismerged: a callback to know if file was merged in that revision

452

* ismerged: a callback to know if file was merged in that revision

453

"""

453

"""

454

cl = repo.changelog

454

cl = repo.changelog

455

parents = cl.parentrevs

455

parents = cl.parentrevs

456

457

def get_ismerged(rev):

457

def get_ismerged(rev):

458

ctx = repo[rev]

458

ctx = repo[rev]

459

460

def ismerged(path):

460

def ismerged(path):

461

if path not in ctx.files():

461

if path not in ctx.files():

462

return False

462

return False

463

fctx = ctx[path]

463

fctx = ctx[path]

464

parents = fctx._filelog.parents(fctx._filenode)

464

parents = fctx._filelog.parents(fctx._filenode)

465

nb_parents = 0

465

nb_parents = 0

466

for n in parents:

466

for n in parents:

467

if n != node.nullid:

467

if n != node.nullid:

468

nb_parents += 1

468

nb_parents += 1

469

return nb_parents >= 2

469

return nb_parents >= 2

470

471

return ismerged

471

return ismerged

472

473

def revinfo(rev):

473

def revinfo(rev):

474

p1, p2 = parents(rev)

474

p1, p2 = parents(rev)

475

ctx = repo[rev]

475

ctx = repo[rev]

476

p1copies, p2copies = ctx._copies

476

p1copies, p2copies = ctx._copies

477

removed = ctx.filesremoved()

477

removed = ctx.filesremoved()

478

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

478

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

479

480

return revinfo

480

return revinfo

481

482

483

def _combine_changeset_copies_extra(

483

def _combine_changeset_copies_extra(

484

revs, children, targetrev, revinfo, match, isancestor

484

revs, children, targetrev, revinfo, match, isancestor

485

):

485

):

486

"""version of `_combine_changeset_copies` that works with the Google

486

"""version of `_combine_changeset_copies` that works with the Google

487

specific "extra" based storage for copy information"""

487

specific "extra" based storage for copy information"""

488

all_copies = {}

488

all_copies = {}

489

alwaysmatch = match.always()

489

alwaysmatch = match.always()

490

for r in revs:

490

for r in revs:

491

copies = all_copies.pop(r, None)

491

copies = all_copies.pop(r, None)

492

if copies is None:

492

if copies is None:

493

# this is a root

493

# this is a root

494

copies = {}

494

copies = {}

495

for i, c in enumerate(children[r]):

495

for i, c in enumerate(children[r]):

496

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

496

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

497

if r == p1:

497

if r == p1:

498

parent = 1

498

parent = 1

499

childcopies = p1copies

499

childcopies = p1copies

500

else:

500

else:

501

assert r == p2

501

assert r == p2

502

parent = 2

502

parent = 2

503

childcopies = p2copies

503

childcopies = p2copies

504

if not alwaysmatch:

504

if not alwaysmatch:

505

childcopies = {

505

childcopies = {

506

dst: src for dst, src in childcopies.items() if match(dst)

506

dst: src for dst, src in childcopies.items() if match(dst)

507

}

507

}

508

newcopies = copies

508

newcopies = copies

509

if childcopies:

509

if childcopies:

510

newcopies = copies.copy()

510

newcopies = copies.copy()

511

for dest, source in pycompat.iteritems(childcopies):

511

for dest, source in pycompat.iteritems(childcopies):

512

prev = copies.get(source)

512

prev = copies.get(source)

513

if prev is not None and prev[1] is not None:

513

if prev is not None and prev[1] is not None:

514

source = prev[1]

514

source = prev[1]

515

newcopies[dest] = (c, source)

515

newcopies[dest] = (c, source)

516

assert newcopies is not copies

516

assert newcopies is not copies

517

for f in removed:

517

for f in removed:

518

if f in newcopies:

518

if f in newcopies:

519

if newcopies is copies:

519

if newcopies is copies:

520

# copy on write to avoid affecting potential other

520

# copy on write to avoid affecting potential other

521

# branches. when there are no other branches, this

521

# branches. when there are no other branches, this

522

# could be avoided.

522

# could be avoided.

523

newcopies = copies.copy()

523

newcopies = copies.copy()

524

newcopies[f] = (c, None)

524

newcopies[f] = (c, None)

525

othercopies = all_copies.get(c)

525

othercopies = all_copies.get(c)

526

if othercopies is None:

526

if othercopies is None:

527

all_copies[c] = newcopies

527

all_copies[c] = newcopies

528

else:

528

else:

529

# we are the second parent to work on c, we need to merge our

529

# we are the second parent to work on c, we need to merge our

530

# work with the other.

530

# work with the other.

531

#

531

#

532

# In case of conflict, parent 1 take precedence over parent 2.

532

# In case of conflict, parent 1 take precedence over parent 2.

533

# This is an arbitrary choice made anew when implementing

533

# This is an arbitrary choice made anew when implementing

534

# changeset based copies. It was made without regards with

534

# changeset based copies. It was made without regards with

535

# potential filelog related behavior.

535

# potential filelog related behavior.

536

if parent == 1:

536

if parent == 1:

537

_merge_copies_dict_extra(

537

_merge_copies_dict_extra(

538

othercopies, newcopies, isancestor, ismerged

538

othercopies, newcopies, isancestor, ismerged

539

)

539

)

540

else:

540

else:

541

_merge_copies_dict_extra(

541

_merge_copies_dict_extra(

542

newcopies, othercopies, isancestor, ismerged

542

newcopies, othercopies, isancestor, ismerged

543

)

543

)

544

all_copies[c] = newcopies

544

all_copies[c] = newcopies

545

546

final_copies = {}

546

final_copies = {}

547

for dest, (tt, source) in all_copies[targetrev].items():

547

for dest, (tt, source) in all_copies[targetrev].items():

548

if source is not None:

548

if source is not None:

549

final_copies[dest] = source

549

final_copies[dest] = source

550

return final_copies

550

return final_copies

551

552

553

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

553

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

554

"""version of `_merge_copies_dict` that works with the Google

554

"""version of `_merge_copies_dict` that works with the Google

555

specific "extra" based storage for copy information"""

555

specific "extra" based storage for copy information"""

556

for dest, value in major.items():

556

for dest, value in major.items():

557

other = minor.get(dest)

557

other = minor.get(dest)

558

if other is None:

558

if other is None:

559

minor[dest] = value

559

minor[dest] = value

560

else:

560

else:

561

new_tt = value[0]

561

new_tt = value[0]

562

other_tt = other[0]

562

other_tt = other[0]

563

if value[1] == other[1]:

563

if value[1] == other[1]:

564

continue

564

continue

565

# content from "major" wins, unless it is older

565

# content from "major" wins, unless it is older

566

# than the branch point or there is a merge

566

# than the branch point or there is a merge

567

if (

567

if (

568

new_tt == other_tt

568

new_tt == other_tt

569

or not isancestor(new_tt, other_tt)

569

or not isancestor(new_tt, other_tt)

570

or ismerged(dest)

570

or ismerged(dest)

571

):

571

):

572

minor[dest] = value

572

minor[dest] = value

573

574

575

def _forwardcopies(a, b, base=None, match=None):

575

def _forwardcopies(a, b, base=None, match=None):

576

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

576

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

577

578

if base is None:

578

if base is None:

579

base = a

579

base = a

580

match = a.repo().narrowmatch(match)

580

match = a.repo().narrowmatch(match)

581

# check for working copy

581

# check for working copy

582

if b.rev() is None:

582

if b.rev() is None:

583

cm = _committedforwardcopies(a, b.p1(), base, match)

583

cm = _committedforwardcopies(a, b.p1(), base, match)

584

# combine copies from dirstate if necessary

584

# combine copies from dirstate if necessary

585

copies = _chain(cm, _dirstatecopies(b._repo, match))

585

copies = _chain(cm, _dirstatecopies(b._repo, match))

586

else:

586

else:

587

copies = _committedforwardcopies(a, b, base, match)

587

copies = _committedforwardcopies(a, b, base, match)

588

return copies

588

return copies

589

590

591

def _backwardrenames(a, b, match):

591

def _backwardrenames(a, b, match):

592

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

592

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

593

return {}

593

return {}

594

595

# Even though we're not taking copies into account, 1:n rename situations

595

# Even though we're not taking copies into account, 1:n rename situations

596

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

596

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

597

# arbitrarily pick one of the renames.

597

# arbitrarily pick one of the renames.

598

# We don't want to pass in "match" here, since that would filter

598

# We don't want to pass in "match" here, since that would filter

599

# the destination by it. Since we're reversing the copies, we want

599

# the destination by it. Since we're reversing the copies, we want

600

# to filter the source instead.

600

# to filter the source instead.

601

f = _forwardcopies(b, a)

601

f = _forwardcopies(b, a)

602

r = {}

602

r = {}

603

for k, v in sorted(pycompat.iteritems(f)):

603

for k, v in sorted(pycompat.iteritems(f)):

604

if match and not match(v):

604

if match and not match(v):

605

continue

605

continue

606

# remove copies

606

# remove copies

607

if v in a:

607

if v in a:

608

continue

608

continue

609

r[v] = k

609

r[v] = k

610

return r

610

return r

611

612

613

def pathcopies(x, y, match=None):

613

def pathcopies(x, y, match=None):

614

"""find {dst@y: src@x} copy mapping for directed compare"""

614

"""find {dst@y: src@x} copy mapping for directed compare"""

615

repo = x._repo

615

repo = x._repo

616

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

616

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

617

if debug:

617

if debug:

618

repo.ui.debug(

618

repo.ui.debug(

619

b'debug.copies: searching copies from %s to %s\n' % (x, y)

619

b'debug.copies: searching copies from %s to %s\n' % (x, y)

620

)

620

)

621

if x == y or not x or not y:

621

if x == y or not x or not y:

622

return {}

622

return {}

623

if y.rev() is None and x == y.p1():

623

if y.rev() is None and x == y.p1():

624

if debug:

624

if debug:

625

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

625

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

626

# short-circuit to avoid issues with merge states

626

# short-circuit to avoid issues with merge states

627

return _dirstatecopies(repo, match)

627

return _dirstatecopies(repo, match)

628

a = y.ancestor(x)

628

a = y.ancestor(x)

629

if a == x:

629

if a == x:

630

if debug:

630

if debug:

631

repo.ui.debug(b'debug.copies: search mode: forward\n')

631

repo.ui.debug(b'debug.copies: search mode: forward\n')

632

copies = _forwardcopies(x, y, match=match)

632

copies = _forwardcopies(x, y, match=match)

633

elif a == y:

633

elif a == y:

634

if debug:

634

if debug:

635

repo.ui.debug(b'debug.copies: search mode: backward\n')

635

repo.ui.debug(b'debug.copies: search mode: backward\n')

636

copies = _backwardrenames(x, y, match=match)

636

copies = _backwardrenames(x, y, match=match)

637

else:

637

else:

638

if debug:

638

if debug:

639

repo.ui.debug(b'debug.copies: search mode: combined\n')

639

repo.ui.debug(b'debug.copies: search mode: combined\n')

640

base = None

640

base = None

641

if a.rev() != node.nullrev:

641

if a.rev() != node.nullrev:

642

base = x

642

base = x

643

copies = _chain(

643

copies = _chain(

644

_backwardrenames(x, a, match=match),

644

_backwardrenames(x, a, match=match),

645

_forwardcopies(a, y, base, match=match),

645

_forwardcopies(a, y, base, match=match),

646

)

646

)

647

_filter(x, y, copies)

647

_filter(x, y, copies)

648

return copies

648

return copies

649

650

651

def mergecopies(repo, c1, c2, base):

651

def mergecopies(repo, c1, c2, base):

652

"""

652

"""

653

Finds moves and copies between context c1 and c2 that are relevant for

653

Finds moves and copies between context c1 and c2 that are relevant for

654

merging. 'base' will be used as the merge base.

654

merging. 'base' will be used as the merge base.

655

656

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

656

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

657

files that were moved/ copied in one merge parent and modified in another.

657

files that were moved/ copied in one merge parent and modified in another.

658

For example:

658

For example:

659

660

o ---> 4 another commit

660

o ---> 4 another commit

661

|

661

|

662

| o ---> 3 commit that modifies a.txt

662

| o ---> 3 commit that modifies a.txt

663

| /

663

| /

664

o / ---> 2 commit that moves a.txt to b.txt

664

o / ---> 2 commit that moves a.txt to b.txt

665

|/

665

|/

666

o ---> 1 merge base

666

o ---> 1 merge base

667

668

If we try to rebase revision 3 on revision 4, since there is no a.txt in

668

If we try to rebase revision 3 on revision 4, since there is no a.txt in

669

revision 4, and if user have copytrace disabled, we prints the following

669

revision 4, and if user have copytrace disabled, we prints the following

670

message:

670

message:

671

672

```other changed <file> which local deleted```

672

```other changed <file> which local deleted```

673

674

Returns a tuple where:

674

Returns a tuple where:

675

676

"branch_copies" an instance of branch_copies.

676

"branch_copies" an instance of branch_copies.

677

678

"diverge" is a mapping of source name -> list of destination names

678

"diverge" is a mapping of source name -> list of destination names

679

for divergent renames.

679

for divergent renames.

680

681

This function calls different copytracing algorithms based on config.

681

This function calls different copytracing algorithms based on config.

682

"""

682

"""

683

# avoid silly behavior for update from empty dir

683

# avoid silly behavior for update from empty dir

684

if not c1 or not c2 or c1 == c2:

684

if not c1 or not c2 or c1 == c2:

685

return branch_copies(), branch_copies(), {}

685

return branch_copies(), branch_copies(), {}

686

687

narrowmatch = c1.repo().narrowmatch()

687

narrowmatch = c1.repo().narrowmatch()

688

689

# avoid silly behavior for parent -> working dir

689

# avoid silly behavior for parent -> working dir

690

if c2.node() is None and c1.node() == repo.dirstate.p1():

690

if c2.node() is None and c1.node() == repo.dirstate.p1():

691

return (

691

return (

692

branch_copies(_dirstatecopies(repo, narrowmatch)),

692

branch_copies(_dirstatecopies(repo, narrowmatch)),

693

branch_copies(),

693

branch_copies(),

694

{},

694

{},

695

)

695

)

696

697

copytracing = repo.ui.config(b'experimental', b'copytrace')

697

copytracing = repo.ui.config(b'experimental', b'copytrace')

698

if stringutil.parsebool(copytracing) is False:

698

if stringutil.parsebool(copytracing) is False:

699

# stringutil.parsebool() returns None when it is unable to parse the

699

# stringutil.parsebool() returns None when it is unable to parse the

700

# value, so we should rely on making sure copytracing is on such cases

700

# value, so we should rely on making sure copytracing is on such cases

701

return branch_copies(), branch_copies(), {}

701

return branch_copies(), branch_copies(), {}

702

703

if usechangesetcentricalgo(repo):

703

if usechangesetcentricalgo(repo):

704

# The heuristics don't make sense when we need changeset-centric algos

704

# The heuristics don't make sense when we need changeset-centric algos

705

return _fullcopytracing(repo, c1, c2, base)

705

return _fullcopytracing(repo, c1, c2, base)

706

707

# Copy trace disabling is explicitly below the node == p1 logic above

707

# Copy trace disabling is explicitly below the node == p1 logic above

708

# because the logic above is required for a simple copy to be kept across a

708

# because the logic above is required for a simple copy to be kept across a

709

# rebase.

709

# rebase.

710

if copytracing == b'heuristics':

710

if copytracing == b'heuristics':

711

# Do full copytracing if only non-public revisions are involved as

711

# Do full copytracing if only non-public revisions are involved as

712

# that will be fast enough and will also cover the copies which could

712

# that will be fast enough and will also cover the copies which could

713

# be missed by heuristics

713

# be missed by heuristics

714

if _isfullcopytraceable(repo, c1, base):

714

if _isfullcopytraceable(repo, c1, base):

715

return _fullcopytracing(repo, c1, c2, base)

715

return _fullcopytracing(repo, c1, c2, base)

716

return _heuristicscopytracing(repo, c1, c2, base)

716

return _heuristicscopytracing(repo, c1, c2, base)

717

else:

717

else:

718

return _fullcopytracing(repo, c1, c2, base)

718

return _fullcopytracing(repo, c1, c2, base)

719

720

721

def _isfullcopytraceable(repo, c1, base):

721

def _isfullcopytraceable(repo, c1, base):

722

"""Checks that if base, source and destination are all no-public branches,

722

"""Checks that if base, source and destination are all no-public branches,

723

if yes let's use the full copytrace algorithm for increased capabilities

723

if yes let's use the full copytrace algorithm for increased capabilities

724

since it will be fast enough.

724

since it will be fast enough.

725

726

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

726

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

727

number of changesets from c1 to base such that if number of changesets are

727

number of changesets from c1 to base such that if number of changesets are

728

more than the limit, full copytracing algorithm won't be used.

728

more than the limit, full copytracing algorithm won't be used.

729

"""

729

"""

730

if c1.rev() is None:

730

if c1.rev() is None:

731

c1 = c1.p1()

731

c1 = c1.p1()

732

if c1.mutable() and base.mutable():

732

if c1.mutable() and base.mutable():

733

sourcecommitlimit = repo.ui.configint(

733

sourcecommitlimit = repo.ui.configint(

734

b'experimental', b'copytrace.sourcecommitlimit'

734

b'experimental', b'copytrace.sourcecommitlimit'

735

)

735

)

736

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

736

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

737

return commits < sourcecommitlimit

737

return commits < sourcecommitlimit

738

return False

738

return False

739

740

741

def _checksinglesidecopies(

741

def _checksinglesidecopies(

742

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

742

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

743

):

743

):

744

if src not in m2:

744

if src not in m2:

745

# deleted on side 2

745

# deleted on side 2

746

if src not in m1:

746

if src not in m1:

747

# renamed on side 1, deleted on side 2

747

# renamed on side 1, deleted on side 2

748

renamedelete[src] = dsts1

748

renamedelete[src] = dsts1

749

elif src not in mb:

749

elif src not in mb:

750

# Work around the "short-circuit to avoid issues with merge states"

750

# Work around the "short-circuit to avoid issues with merge states"

751

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

751

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

752

# destination doesn't exist in y.

752

# destination doesn't exist in y.

753

pass

753

pass

754

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

754

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

755

return

755

return

756

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

756

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

757

# modified on side 2

757

# modified on side 2

758

for dst in dsts1:

758

for dst in dsts1:

759

copy[dst] = src

759

copy[dst] = src

760

761

762

class branch_copies(object):

762

class branch_copies(object):

763

"""Information about copies made on one side of a merge/graft.

763

"""Information about copies made on one side of a merge/graft.

764

765

"copy" is a mapping from destination name -> source name,

765

"copy" is a mapping from destination name -> source name,

766

where source is in c1 and destination is in c2 or vice-versa.

766

where source is in c1 and destination is in c2 or vice-versa.

767

768

"movewithdir" is a mapping from source name -> destination name,

768

"movewithdir" is a mapping from source name -> destination name,

769

where the file at source present in one context but not the other

769

where the file at source present in one context but not the other

770

needs to be moved to destination by the merge process, because the

770

needs to be moved to destination by the merge process, because the

771

other context moved the directory it is in.

771

other context moved the directory it is in.

772

773

"renamedelete" is a mapping of source name -> list of destination

773

"renamedelete" is a mapping of source name -> list of destination

774

names for files deleted in c1 that were renamed in c2 or vice-versa.

774

names for files deleted in c1 that were renamed in c2 or vice-versa.

775

776

"dirmove" is a mapping of detected source dir -> destination dir renames.

776

"dirmove" is a mapping of detected source dir -> destination dir renames.

777

This is needed for handling changes to new files previously grafted into

777

This is needed for handling changes to new files previously grafted into

778

renamed directories.

778

renamed directories.

779

"""

779

"""

780

781

def __init__(

781

def __init__(

782

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

782

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

783

):

783

):

784

self.copy = {} if copy is None else copy

784

self.copy = {} if copy is None else copy

785

self.renamedelete = {} if renamedelete is None else renamedelete

785

self.renamedelete = {} if renamedelete is None else renamedelete

786

self.dirmove = {} if dirmove is None else dirmove

786

self.dirmove = {} if dirmove is None else dirmove

787

self.movewithdir = {} if movewithdir is None else movewithdir

787

self.movewithdir = {} if movewithdir is None else movewithdir

788

789

def __repr__(self):

789

def __repr__(self):

790

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

790

return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (

791

self.copy,

791

self.copy,

792

self.renamedelete,

792

self.renamedelete,

793

self.dirmove,

793

self.dirmove,

794

self.movewithdir,

794

self.movewithdir,

795

)

795

)

796

797

798

def _fullcopytracing(repo, c1, c2, base):

798

def _fullcopytracing(repo, c1, c2, base):

799

"""The full copytracing algorithm which finds all the new files that were

799

"""The full copytracing algorithm which finds all the new files that were

800

added from merge base up to the top commit and for each file it checks if

800

added from merge base up to the top commit and for each file it checks if

801

this file was copied from another file.

801

this file was copied from another file.

802

803

This is pretty slow when a lot of changesets are involved but will track all

803

This is pretty slow when a lot of changesets are involved but will track all

804

the copies.

804

the copies.

805

"""

805

"""

806

m1 = c1.manifest()

806

m1 = c1.manifest()

807

m2 = c2.manifest()

807

m2 = c2.manifest()

808

mb = base.manifest()

808

mb = base.manifest()

809

810

copies1 = pathcopies(base, c1)

810

copies1 = pathcopies(base, c1)

811

copies2 = pathcopies(base, c2)

811

copies2 = pathcopies(base, c2)

812

813

if not (copies1 or copies2):

813

if not (copies1 or copies2):

814

return branch_copies(), branch_copies(), {}

814

return branch_copies(), branch_copies(), {}

815

816

inversecopies1 = {}

816

inversecopies1 = {}

817

inversecopies2 = {}

817

inversecopies2 = {}

818

for dst, src in copies1.items():

818

for dst, src in copies1.items():

819

inversecopies1.setdefault(src, []).append(dst)

819

inversecopies1.setdefault(src, []).append(dst)

820

for dst, src in copies2.items():

820

for dst, src in copies2.items():

821

inversecopies2.setdefault(src, []).append(dst)

821

inversecopies2.setdefault(src, []).append(dst)

822

823

copy1 = {}

823

copy1 = {}

824

copy2 = {}

824

copy2 = {}

825

diverge = {}

825

diverge = {}

826

renamedelete1 = {}

826

renamedelete1 = {}

827

renamedelete2 = {}

827

renamedelete2 = {}

828

allsources = set(inversecopies1) | set(inversecopies2)

828

allsources = set(inversecopies1) | set(inversecopies2)

829

for src in allsources:

829

for src in allsources:

830

dsts1 = inversecopies1.get(src)

830

dsts1 = inversecopies1.get(src)

831

dsts2 = inversecopies2.get(src)

831

dsts2 = inversecopies2.get(src)

832

if dsts1 and dsts2:

832

if dsts1 and dsts2:

833

# copied/renamed on both sides

833

# copied/renamed on both sides

834

if src not in m1 and src not in m2:

834

if src not in m1 and src not in m2:

835

# renamed on both sides

835

# renamed on both sides

836

dsts1 = set(dsts1)

836

dsts1 = set(dsts1)

837

dsts2 = set(dsts2)

837

dsts2 = set(dsts2)

838

# If there's some overlap in the rename destinations, we

838

# If there's some overlap in the rename destinations, we

839

# consider it not divergent. For example, if side 1 copies 'a'

839

# consider it not divergent. For example, if side 1 copies 'a'

840

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

840

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

841

# and 'd' and deletes 'a'.

841

# and 'd' and deletes 'a'.

842

if dsts1 & dsts2:

842

if dsts1 & dsts2:

843

for dst in dsts1 & dsts2:

843

for dst in dsts1 & dsts2:

844

copy1[dst] = src

844

copy1[dst] = src

845

copy2[dst] = src

845

copy2[dst] = src

846

else:

846

else:

847

diverge[src] = sorted(dsts1 | dsts2)

847

diverge[src] = sorted(dsts1 | dsts2)

848

elif src in m1 and src in m2:

848

elif src in m1 and src in m2:

849

# copied on both sides

849

# copied on both sides

850

dsts1 = set(dsts1)

850

dsts1 = set(dsts1)

851

dsts2 = set(dsts2)

851

dsts2 = set(dsts2)

852

for dst in dsts1 & dsts2:

852

for dst in dsts1 & dsts2:

853

copy1[dst] = src

853

copy1[dst] = src

854

copy2[dst] = src

854

copy2[dst] = src

855

# TODO: Handle cases where it was renamed on one side and copied

855

# TODO: Handle cases where it was renamed on one side and copied

856

# on the other side

856

# on the other side

857

elif dsts1:

857

elif dsts1:

858

# copied/renamed only on side 1

858

# copied/renamed only on side 1

859

_checksinglesidecopies(

859

_checksinglesidecopies(

860

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

860

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

861

)

861

)

862

elif dsts2:

862

elif dsts2:

863

# copied/renamed only on side 2

863

# copied/renamed only on side 2

864

_checksinglesidecopies(

864

_checksinglesidecopies(

865

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

865

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

866

)

866

)

867

868

# find interesting file sets from manifests

868

# find interesting file sets from manifests

869

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

869

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

870

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

870

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

871

u1 = sorted(addedinm1 - addedinm2)

871

u1 = sorted(addedinm1 - addedinm2)

872

u2 = sorted(addedinm2 - addedinm1)

872

u2 = sorted(addedinm2 - addedinm1)

873

874

header = b" unmatched files in %s"

874

header = b" unmatched files in %s"

875

if u1:

875

if u1:

876

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

876

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

877

if u2:

877

if u2:

878

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

878

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

879

880

if repo.ui.debugflag:

880

if repo.ui.debugflag:

881

renamedeleteset = set()

881

renamedeleteset = set()

882

divergeset = set()

882

divergeset = set()

883

for dsts in diverge.values():

883

for dsts in diverge.values():

884

divergeset.update(dsts)

884

divergeset.update(dsts)

885

for dsts in renamedelete1.values():

885

for dsts in renamedelete1.values():

886

renamedeleteset.update(dsts)

886

renamedeleteset.update(dsts)

887

for dsts in renamedelete2.values():

887

for dsts in renamedelete2.values():

888

renamedeleteset.update(dsts)

888

renamedeleteset.update(dsts)

889

890

repo.ui.debug(

890

repo.ui.debug(

891

b" all copies found (* = to merge, ! = divergent, "

891

b" all copies found (* = to merge, ! = divergent, "

892

b"% = renamed and deleted):\n"

892

b"% = renamed and deleted):\n"

893

)

893

)

894

for side, copies in ((b"local", copies1), (b"remote", copies2)):

894

for side, copies in ((b"local", copies1), (b"remote", copies2)):

895

if not copies:

895

if not copies:

896

continue

896

continue

897

repo.ui.debug(b" on %s side:\n" % side)

897

repo.ui.debug(b" on %s side:\n" % side)

898

for f in sorted(copies):

898

for f in sorted(copies):

899

note = b""

899

note = b""

900

if f in copy1 or f in copy2:

900

if f in copy1 or f in copy2:

901

note += b"*"

901

note += b"*"

902

if f in divergeset:

902

if f in divergeset:

903

note += b"!"

903

note += b"!"

904

if f in renamedeleteset:

904

if f in renamedeleteset:

905

note += b"%"

905

note += b"%"

906

repo.ui.debug(

906

repo.ui.debug(

907

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

907

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

908

)

908

)

909

del renamedeleteset

909

del renamedeleteset

910

del divergeset

910

del divergeset

911

912

repo.ui.debug(b" checking for directory renames\n")

912

repo.ui.debug(b" checking for directory renames\n")

913

914

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

914

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

915

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

915

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

916

917

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

917

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

918

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

918

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

919

920

return branch_copies1, branch_copies2, diverge

920

return branch_copies1, branch_copies2, diverge

921

922

923

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

923

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

924

"""Finds moved directories and files that should move with them.

924

"""Finds moved directories and files that should move with them.

925

926

ctx: the context for one of the sides

926

ctx: the context for one of the sides

927

copy: files copied on the same side (as ctx)

927

copy: files copied on the same side (as ctx)

928

fullcopy: files copied on the same side (as ctx), including those that

928

fullcopy: files copied on the same side (as ctx), including those that

929

merge.manifestmerge() won't care about

929

merge.manifestmerge() won't care about

930

addedfiles: added files on the other side (compared to ctx)

930

addedfiles: added files on the other side (compared to ctx)

931

"""

931

"""

932

# generate a directory move map

932

# generate a directory move map

933

d = ctx.dirs()

934

invalid = set()

933

invalid = set()

935

dirmove = {}

934

dirmove = {}

936

935

937

# examine each file copy for a potential directory move, which is

936

# examine each file copy for a potential directory move, which is

938

# when all the files in a directory are moved to a new directory

937

# when all the files in a directory are moved to a new directory

939

for dst, src in pycompat.iteritems(fullcopy):

938

for dst, src in pycompat.iteritems(fullcopy):

940

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

939

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

941

if dsrc in invalid:

940

if dsrc in invalid:

942

# already seen to be uninteresting

941

# already seen to be uninteresting

943

continue

942

continue

944

elif dsrc in d and ddst in d:

943

elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):

945

# directory wasn't entirely moved locally

944

# directory wasn't entirely moved locally

946

invalid.add(dsrc)

945

invalid.add(dsrc)

947

elif dsrc in dirmove and dirmove[dsrc] != ddst:

946

elif dsrc in dirmove and dirmove[dsrc] != ddst:

948

# files from the same directory moved to two different places

947

# files from the same directory moved to two different places

949

invalid.add(dsrc)

948

invalid.add(dsrc)

950

else:

949

else:

951

# looks good so far

950

# looks good so far

952

dirmove[dsrc] = ddst

951

dirmove[dsrc] = ddst

953

952

954

for i in invalid:

953

for i in invalid:

955

if i in dirmove:

954

if i in dirmove:

956

del dirmove[i]

955

del dirmove[i]

957

del d, invalid

956

del invalid

958

957

959

if not dirmove:

958

if not dirmove:

960

return {}, {}

959

return {}, {}

961

960

962

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

961

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

963

962

964

for d in dirmove:

963

for d in dirmove:

965

repo.ui.debug(

964

repo.ui.debug(

966

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

965

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

967

)

966

)

968

967

969

movewithdir = {}

968

movewithdir = {}

970

# check unaccounted nonoverlapping files against directory moves

969

# check unaccounted nonoverlapping files against directory moves

971

for f in addedfiles:

970

for f in addedfiles:

972

if f not in fullcopy:

971

if f not in fullcopy:

973

for d in dirmove:

972

for d in dirmove:

974

if f.startswith(d):

973

if f.startswith(d):

975

# new file added in a directory that was moved, move it

974

# new file added in a directory that was moved, move it

976

df = dirmove[d] + f[len(d) :]

975

df = dirmove[d] + f[len(d) :]

977

if df not in copy:

976

if df not in copy:

978

movewithdir[f] = df

977

movewithdir[f] = df

979

repo.ui.debug(

978

repo.ui.debug(

980

b" pending file src: '%s' -> dst: '%s'\n"

979

b" pending file src: '%s' -> dst: '%s'\n"

981

% (f, df)

980

% (f, df)

982

)

981

)

983

break

982

break

984

983

985

return dirmove, movewithdir

984

return dirmove, movewithdir

986

985

987

986

988

def _heuristicscopytracing(repo, c1, c2, base):

987

def _heuristicscopytracing(repo, c1, c2, base):

989

"""Fast copytracing using filename heuristics

988

"""Fast copytracing using filename heuristics

990

989

991

Assumes that moves or renames are of following two types:

990

Assumes that moves or renames are of following two types:

992

991

993

1) Inside a directory only (same directory name but different filenames)

992

1) Inside a directory only (same directory name but different filenames)

994

2) Move from one directory to another

993

2) Move from one directory to another

995

(same filenames but different directory names)

994

(same filenames but different directory names)

996

995

997

Works only when there are no merge commits in the "source branch".

996

Works only when there are no merge commits in the "source branch".

998

Source branch is commits from base up to c2 not including base.

997

Source branch is commits from base up to c2 not including base.

999

998

1000

If merge is involved it fallbacks to _fullcopytracing().

999

If merge is involved it fallbacks to _fullcopytracing().

1001

1000

1002

Can be used by setting the following config:

1001

Can be used by setting the following config:

1003

1002

1004

[experimental]

1003

[experimental]

1005

copytrace = heuristics

1004

copytrace = heuristics

1006

1005

1007

In some cases the copy/move candidates found by heuristics can be very large

1006

In some cases the copy/move candidates found by heuristics can be very large

1008

in number and that will make the algorithm slow. The number of possible

1007

in number and that will make the algorithm slow. The number of possible

1009

candidates to check can be limited by using the config

1008

candidates to check can be limited by using the config

1010

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1009

`experimental.copytrace.movecandidateslimit` which defaults to 100.

1011

"""

1010

"""

1012

1011

1013

if c1.rev() is None:

1012

if c1.rev() is None:

1014

c1 = c1.p1()

1013

c1 = c1.p1()

1015

if c2.rev() is None:

1014

if c2.rev() is None:

1016

c2 = c2.p1()

1015

c2 = c2.p1()

1017

1016

1018

changedfiles = set()

1017

changedfiles = set()

1019

m1 = c1.manifest()

1018

m1 = c1.manifest()

1020

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1019

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1021

# If base is not in c2 branch, we switch to fullcopytracing

1020

# If base is not in c2 branch, we switch to fullcopytracing

1022

repo.ui.debug(

1021

repo.ui.debug(

1023

b"switching to full copytracing as base is not "

1022

b"switching to full copytracing as base is not "

1024

b"an ancestor of c2\n"

1023

b"an ancestor of c2\n"

1025

)

1024

)

1026

return _fullcopytracing(repo, c1, c2, base)

1025

return _fullcopytracing(repo, c1, c2, base)

1027

1026

1028

ctx = c2

1027

ctx = c2

1029

while ctx != base:

1028

while ctx != base:

1030

if len(ctx.parents()) == 2:

1029

if len(ctx.parents()) == 2:

1031

# To keep things simple let's not handle merges

1030

# To keep things simple let's not handle merges

1032

repo.ui.debug(b"switching to full copytracing because of merges\n")

1031

repo.ui.debug(b"switching to full copytracing because of merges\n")

1033

return _fullcopytracing(repo, c1, c2, base)

1032

return _fullcopytracing(repo, c1, c2, base)

1034

changedfiles.update(ctx.files())

1033

changedfiles.update(ctx.files())

1035

ctx = ctx.p1()

1034

ctx = ctx.p1()

1036

1035

1037

copies2 = {}

1036

copies2 = {}

1038

cp = _forwardcopies(base, c2)

1037

cp = _forwardcopies(base, c2)

1039

for dst, src in pycompat.iteritems(cp):

1038

for dst, src in pycompat.iteritems(cp):

1040

if src in m1:

1039

if src in m1:

1041

copies2[dst] = src

1040

copies2[dst] = src

1042

1041

1043

# file is missing if it isn't present in the destination, but is present in

1042

# file is missing if it isn't present in the destination, but is present in

1044

# the base and present in the source.

1043

# the base and present in the source.

1045

# Presence in the base is important to exclude added files, presence in the

1044

# Presence in the base is important to exclude added files, presence in the

1046

# source is important to exclude removed files.

1045

# source is important to exclude removed files.

1047

filt = lambda f: f not in m1 and f in base and f in c2

1046

filt = lambda f: f not in m1 and f in base and f in c2

1048

missingfiles = [f for f in changedfiles if filt(f)]

1047

missingfiles = [f for f in changedfiles if filt(f)]

1049

1048

1050

copies1 = {}

1049

copies1 = {}

1051

if missingfiles:

1050

if missingfiles:

1052

basenametofilename = collections.defaultdict(list)

1051

basenametofilename = collections.defaultdict(list)

1053

dirnametofilename = collections.defaultdict(list)

1052

dirnametofilename = collections.defaultdict(list)

1054

1053

1055

for f in m1.filesnotin(base.manifest()):

1054

for f in m1.filesnotin(base.manifest()):

1056

basename = os.path.basename(f)

1055

basename = os.path.basename(f)

1057

dirname = os.path.dirname(f)

1056

dirname = os.path.dirname(f)

1058

basenametofilename[basename].append(f)

1057

basenametofilename[basename].append(f)

1059

dirnametofilename[dirname].append(f)

1058

dirnametofilename[dirname].append(f)

1060

1059

1061

for f in missingfiles:

1060

for f in missingfiles:

1062

basename = os.path.basename(f)

1061

basename = os.path.basename(f)

1063

dirname = os.path.dirname(f)

1062

dirname = os.path.dirname(f)

1064

samebasename = basenametofilename[basename]

1063

samebasename = basenametofilename[basename]

1065

samedirname = dirnametofilename[dirname]

1064

samedirname = dirnametofilename[dirname]

1066

movecandidates = samebasename + samedirname

1065

movecandidates = samebasename + samedirname

1067

# f is guaranteed to be present in c2, that's why

1066

# f is guaranteed to be present in c2, that's why

1068

# c2.filectx(f) won't fail

1067

# c2.filectx(f) won't fail

1069

f2 = c2.filectx(f)

1068

f2 = c2.filectx(f)

1070

# we can have a lot of candidates which can slow down the heuristics

1069

# we can have a lot of candidates which can slow down the heuristics

1071

# config value to limit the number of candidates moves to check

1070

# config value to limit the number of candidates moves to check

1072

maxcandidates = repo.ui.configint(

1071

maxcandidates = repo.ui.configint(

1073

b'experimental', b'copytrace.movecandidateslimit'

1072

b'experimental', b'copytrace.movecandidateslimit'

1074

)

1073

)

1075

1074

1076

if len(movecandidates) > maxcandidates:

1075

if len(movecandidates) > maxcandidates:

1077

repo.ui.status(

1076

repo.ui.status(

1078

_(

1077

_(

1079

b"skipping copytracing for '%s', more "

1078

b"skipping copytracing for '%s', more "

1080

b"candidates than the limit: %d\n"

1079

b"candidates than the limit: %d\n"

1081

)

1080

)

1082

% (f, len(movecandidates))

1081

% (f, len(movecandidates))

1083

)

1082

)

1084

continue

1083

continue

1085

1084

1086

for candidate in movecandidates:

1085

for candidate in movecandidates:

1087

f1 = c1.filectx(candidate)

1086

f1 = c1.filectx(candidate)

1088

if _related(f1, f2):

1087

if _related(f1, f2):

1089

# if there are a few related copies then we'll merge

1088

# if there are a few related copies then we'll merge

1090

# changes into all of them. This matches the behaviour

1089

# changes into all of them. This matches the behaviour

1091

# of upstream copytracing

1090

# of upstream copytracing

1092

copies1[candidate] = f

1091

copies1[candidate] = f

1093

1092

1094

return branch_copies(copies1), branch_copies(copies2), {}

1093

return branch_copies(copies1), branch_copies(copies2), {}

1095

1094

1096

1095

1097

def _related(f1, f2):

1096

def _related(f1, f2):

1098

"""return True if f1 and f2 filectx have a common ancestor

1097

"""return True if f1 and f2 filectx have a common ancestor

1099

1098

1100

Walk back to common ancestor to see if the two files originate

1099

Walk back to common ancestor to see if the two files originate

1101

from the same file. Since workingfilectx's rev() is None it messes

1100

from the same file. Since workingfilectx's rev() is None it messes

1102

up the integer comparison logic, hence the pre-step check for

1101

up the integer comparison logic, hence the pre-step check for

1103

None (f1 and f2 can only be workingfilectx's initially).

1102

None (f1 and f2 can only be workingfilectx's initially).

1104

"""

1103

"""

1105

1104

1106

if f1 == f2:

1105

if f1 == f2:

1107

return True # a match

1106

return True # a match

1108

1107

1109

g1, g2 = f1.ancestors(), f2.ancestors()

1108

g1, g2 = f1.ancestors(), f2.ancestors()

1110

try:

1109

try:

1111

f1r, f2r = f1.linkrev(), f2.linkrev()

1110

f1r, f2r = f1.linkrev(), f2.linkrev()

1112

1111

1113

if f1r is None:

1112

if f1r is None:

1114

f1 = next(g1)

1113

f1 = next(g1)

1115

if f2r is None:

1114

if f2r is None:

1116

f2 = next(g2)

1115

f2 = next(g2)

1117

1116

1118

while True:

1117

while True:

1119

f1r, f2r = f1.linkrev(), f2.linkrev()

1118

f1r, f2r = f1.linkrev(), f2.linkrev()

1120

if f1r > f2r:

1119

if f1r > f2r:

1121

f1 = next(g1)

1120

f1 = next(g1)

1122

elif f2r > f1r:

1121

elif f2r > f1r:

1123

f2 = next(g2)

1122

f2 = next(g2)

1124

else: # f1 and f2 point to files in the same linkrev

1123

else: # f1 and f2 point to files in the same linkrev

1125

return f1 == f2 # true if they point to the same file

1124

return f1 == f2 # true if they point to the same file

1126

except StopIteration:

1125

except StopIteration:

1127

return False

1126

return False

1128

1127

1129

1128

1130

def graftcopies(wctx, ctx, base):

1129

def graftcopies(wctx, ctx, base):

1131

"""reproduce copies between base and ctx in the wctx

1130

"""reproduce copies between base and ctx in the wctx

1132

1131

1133

Unlike mergecopies(), this function will only consider copies between base

1132

Unlike mergecopies(), this function will only consider copies between base

1134

and ctx; it will ignore copies between base and wctx. Also unlike

1133

and ctx; it will ignore copies between base and wctx. Also unlike

1135

mergecopies(), this function will apply copies to the working copy (instead

1134

mergecopies(), this function will apply copies to the working copy (instead

1136

of just returning information about the copies). That makes it cheaper

1135

of just returning information about the copies). That makes it cheaper

1137

(especially in the common case of base==ctx.p1()) and useful also when

1136

(especially in the common case of base==ctx.p1()) and useful also when

1138

experimental.copytrace=off.

1137

experimental.copytrace=off.

1139

1138

1140

merge.update() will have already marked most copies, but it will only

1139

merge.update() will have already marked most copies, but it will only

1141

mark copies if it thinks the source files are related (see

1140

mark copies if it thinks the source files are related (see

1142

merge._related()). It will also not mark copies if the file wasn't modified

1141

merge._related()). It will also not mark copies if the file wasn't modified

1143

on the local side. This function adds the copies that were "missed"

1142

on the local side. This function adds the copies that were "missed"

1144

by merge.update().

1143

by merge.update().

1145

"""

1144

"""

1146

new_copies = pathcopies(base, ctx)

1145

new_copies = pathcopies(base, ctx)

1147

_filter(wctx.p1(), wctx, new_copies)

1146

_filter(wctx.p1(), wctx, new_copies)

1148

for dst, src in pycompat.iteritems(new_copies):

1147

for dst, src in pycompat.iteritems(new_copies):

1149

wctx[dst].markcopied(src)

1148

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import flagutil
             rustmod = policy.importrust("copy_tracing")
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 # A small cache to avoid doing the work twice for merges
                 #
                 # In the vast majority of cases, if we ask information for a revision
                 # about 1 parent, we'll later ask it for the other. So it make sense to
                 # keep the information around when reaching the first parent of a merge
                 # and dropping it after it was provided for the second parents.
                 #
                 # It exists cases were only one parent of the merge will be walked. It
                 # happens when the "destination" the copy tracing is descendant from a
                 # new root, not common with the "source". In that case, we will only walk
                 # through merge parents that are descendant of changesets common
                 # between "source" and "destination".
                 #
                 # With the current case implementation if such changesets have a copy
                 # information, we'll keep them in memory until the end of
                 # _changesetforwardcopies. We don't expect the case to be frequent
                 # enough to matters.
                 #
                 # In addition, it would be possible to reach pathological case, were
                 # many first parent are met before any second parent is reached. In
                 # that case the cache could grow. If this even become an issue one can
                 # safely introduce a maximum cache size. This would trade extra CPU/IO
                 # time to save memory.
                 merge_caches = {}
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     value = None
                     e = merge_caches.pop(rev, None)
                     if e is not None:
                         return e
                     changes = None
                     if flags(rev) & HASCOPIESINFO:
                         changes = changelogrevision(rev).changes
                     value = (p1, p2, changes)
                     if p1 != node.nullrev and p2 != node.nullrev:
                         # XXX some case we over cache, IGNORE
                         merge_caches[rev] = value
                     return value
                 return revinfo
             def cached_is_ancestor(is_ancestor):
                 """return a cached version of is_ancestor"""
                 cache = {}
                 def _is_ancestor(anc, desc):
                     if anc > desc:
                         return False
                     elif anc == desc:
                         return True
                     key = (anc, desc)
                     ret = cache.get(key)
                     if ret is None:
                         ret = cache[key] = is_ancestor(anc, desc)
                     return ret
                 return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 if repo.filecopiesmode == b'changeset-sidedata':
                     revinfo = _revinfo_getter(repo)
                     return _combine_changeset_copies(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 alwaysmatch = match.always()
                 if rustmod is not None and alwaysmatch:
                     return rustmod.combine_changeset_copies(
                         list(revs), children, targetrev, revinfo, isancestor
                     )
                 isancestor = cached_is_ancestor(isancestor)
                 all_copies = {}
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, changes = revinfo(c)
                         childcopies = {}
                         if r == p1:
                             parent = 1
                             if changes is not None:
                                 childcopies = changes.copied_from_p1
                         else:
                             assert r == p2
                             parent = 2
                             if changes is not None:
                                 childcopies = changes.copied_from_p2
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         if changes is not None and changes.removed:
                             if newcopies is copies:
                                 newcopies = copies.copy()
                             for f in changes.removed:
                                 if f in newcopies:
                                     if newcopies is copies:
                                         # copy on write to avoid affecting potential other
                                         # branches.  when there are no other branches, this
                                         # could be avoided.
                                         newcopies = copies.copy()
                                     newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         elif newcopies is othercopies:
                             # nothing to merge:
                             pass
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 minor, major = othercopies, newcopies
                             else:
                                 minor, major = newcopies, othercopies
                             copies = _merge_copies_dict(minor, major, isancestor, changes)
                             all_copies[c] = copies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 return the resulting dict (in practice, the "minor" object, updated)
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if new_tt == other_tt:
                             minor[dest] = value
                         elif (
                             changes is not None
                             and value[1] is None
                             and dest in changes.salvaged
                         ):
                             pass
                         elif (
                             changes is not None
                             and other[1] is None
                             and dest in changes.salvaged
                         ):
                             minor[dest] = value
                         elif changes is not None and dest in changes.merged:
                             minor[dest] = value
                         elif not isancestor(new_tt, other_tt):
                             if value[1] is not None:
                                 minor[dest] = value
                             elif isancestor(other_tt, new_tt):
                                 minor[dest] = value
                 return minor
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>' % (
                         self.copy,
                         self.renamedelete,
                         self.dirmove,
                         self.movewithdir,
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
-                d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
-                    elif dsrc in d and ddst in d:
+                    elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
-                del d, invalid
+                del invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)