upstream/mercurial-mirror Commit - r44749:30862e22

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import multiprocessing

11

import multiprocessing

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(prefix, suffix):

67

def _chain(prefix, suffix):

68

"""chain two sets of copies 'prefix' and 'suffix'"""

68

"""chain two sets of copies 'prefix' and 'suffix'"""

69

result = prefix.copy()

69

result = prefix.copy()

70

for key, value in pycompat.iteritems(suffix):

70

for key, value in pycompat.iteritems(suffix):

71

result[key] = prefix.get(value, value)

71

result[key] = prefix.get(value, value)

72

return result

72

return result

73

74

75

def _tracefile(fctx, am, basemf):

75

def _tracefile(fctx, am, basemf):

76

"""return file context that is the ancestor of fctx present in ancestor

76

"""return file context that is the ancestor of fctx present in ancestor

77

manifest am

77

manifest am

78

79

Note: we used to try and stop after a given limit, however checking if that

79

Note: we used to try and stop after a given limit, however checking if that

80

limit is reached turned out to be very expensive. we are better off

80

limit is reached turned out to be very expensive. we are better off

81

disabling that feature."""

81

disabling that feature."""

82

83

for f in fctx.ancestors():

83

for f in fctx.ancestors():

84

path = f.path()

84

path = f.path()

85

if am.get(path, None) == f.filenode():

85

if am.get(path, None) == f.filenode():

86

return path

86

return path

87

if basemf and basemf.get(path, None) == f.filenode():

87

if basemf and basemf.get(path, None) == f.filenode():

88

return path

88

return path

89

90

91

def _dirstatecopies(repo, match=None):

91

def _dirstatecopies(repo, match=None):

92

ds = repo.dirstate

92

ds = repo.dirstate

93

c = ds.copies().copy()

93

c = ds.copies().copy()

94

for k in list(c):

94

for k in list(c):

95

if ds[k] not in b'anm' or (match and not match(k)):

95

if ds[k] not in b'anm' or (match and not match(k)):

96

del c[k]

96

del c[k]

97

return c

97

return c

98

99

100

def _computeforwardmissing(a, b, match=None):

100

def _computeforwardmissing(a, b, match=None):

101

"""Computes which files are in b but not a.

101

"""Computes which files are in b but not a.

102

This is its own function so extensions can easily wrap this call to see what

102

This is its own function so extensions can easily wrap this call to see what

103

files _forwardcopies is about to process.

103

files _forwardcopies is about to process.

104

"""

104

"""

105

ma = a.manifest()

105

ma = a.manifest()

106

mb = b.manifest()

106

mb = b.manifest()

107

return mb.filesnotin(ma, match=match)

107

return mb.filesnotin(ma, match=match)

108

109

110

def usechangesetcentricalgo(repo):

110

def usechangesetcentricalgo(repo):

111

"""Checks if we should use changeset-centric copy algorithms"""

111

"""Checks if we should use changeset-centric copy algorithms"""

112

if repo.filecopiesmode == b'changeset-sidedata':

112

if repo.filecopiesmode == b'changeset-sidedata':

113

return True

113

return True

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

115

changesetsource = (b'changeset-only', b'compatibility')

115

changesetsource = (b'changeset-only', b'compatibility')

116

return readfrom in changesetsource

116

return readfrom in changesetsource

117

118

119

def _committedforwardcopies(a, b, base, match):

119

def _committedforwardcopies(a, b, base, match):

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

121

# files might have to be traced back to the fctx parent of the last

121

# files might have to be traced back to the fctx parent of the last

122

# one-side-only changeset, but not further back than that

122

# one-side-only changeset, but not further back than that

123

repo = a._repo

123

repo = a._repo

124

125

if usechangesetcentricalgo(repo):

125

if usechangesetcentricalgo(repo):

126

return _changesetforwardcopies(a, b, match)

126

return _changesetforwardcopies(a, b, match)

127

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

129

dbg = repo.ui.debug

129

dbg = repo.ui.debug

130

if debug:

130

if debug:

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

132

am = a.manifest()

132

am = a.manifest()

133

basemf = None if base is None else base.manifest()

133

basemf = None if base is None else base.manifest()

134

135

# find where new files came from

135

# find where new files came from

136

# we currently don't try to find where old files went, too expensive

136

# we currently don't try to find where old files went, too expensive

137

# this means we can miss a case like 'hg rm b; hg cp a b'

137

# this means we can miss a case like 'hg rm b; hg cp a b'

138

cm = {}

138

cm = {}

139

140

# Computing the forward missing is quite expensive on large manifests, since

140

# Computing the forward missing is quite expensive on large manifests, since

141

# it compares the entire manifests. We can optimize it in the common use

141

# it compares the entire manifests. We can optimize it in the common use

142

# case of computing what copies are in a commit versus its parent (like

142

# case of computing what copies are in a commit versus its parent (like

143

# during a rebase or histedit). Note, we exclude merge commits from this

143

# during a rebase or histedit). Note, we exclude merge commits from this

144

# optimization, since the ctx.files() for a merge commit is not correct for

144

# optimization, since the ctx.files() for a merge commit is not correct for

145

# this comparison.

145

# this comparison.

146

forwardmissingmatch = match

146

forwardmissingmatch = match

147

if b.p1() == a and b.p2().node() == node.nullid:

147

if b.p1() == a and b.p2().node() == node.nullid:

148

filesmatcher = matchmod.exact(b.files())

148

filesmatcher = matchmod.exact(b.files())

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

151

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

153

154

if debug:

154

if debug:

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

156

157

for f in sorted(missing):

157

for f in sorted(missing):

158

if debug:

158

if debug:

159

dbg(b'debug.copies: tracing file: %s\n' % f)

159

dbg(b'debug.copies: tracing file: %s\n' % f)

160

fctx = b[f]

160

fctx = b[f]

161

fctx._ancestrycontext = ancestrycontext

161

fctx._ancestrycontext = ancestrycontext

162

163

if debug:

163

if debug:

164

start = util.timer()

164

start = util.timer()

165

opath = _tracefile(fctx, am, basemf)

165

opath = _tracefile(fctx, am, basemf)

166

if opath:

166

if opath:

167

if debug:

167

if debug:

168

dbg(b'debug.copies: rename of: %s\n' % opath)

168

dbg(b'debug.copies: rename of: %s\n' % opath)

169

cm[f] = opath

169

cm[f] = opath

170

if debug:

170

if debug:

171

dbg(

171

dbg(

172

b'debug.copies: time: %f seconds\n'

172

b'debug.copies: time: %f seconds\n'

173

% (util.timer() - start)

173

% (util.timer() - start)

174

)

174

)

175

return cm

175

return cm

176

177

178

def _revinfogetter(repo):

178

def _revinfogetter(repo):

179

"""return a function that return multiple data given a <rev>"i

179

"""return a function that return multiple data given a <rev>"i

180

181

* p1: revision number of first parent

181

* p1: revision number of first parent

182

* p2: revision number of first parent

182

* p2: revision number of first parent

183

* p1copies: mapping of copies from p1

183

* p1copies: mapping of copies from p1

184

* p2copies: mapping of copies from p2

184

* p2copies: mapping of copies from p2

185

* removed: a list of removed files

185

* removed: a list of removed files

186

"""

186

"""

187

cl = repo.changelog

187

cl = repo.changelog

188

parents = cl.parentrevs

188

parents = cl.parentrevs

189

190

if repo.filecopiesmode == b'changeset-sidedata':

190

if repo.filecopiesmode == b'changeset-sidedata':

191

changelogrevision = cl.changelogrevision

191

changelogrevision = cl.changelogrevision

192

flags = cl.flags

192

flags = cl.flags

193

194

# A small cache to avoid doing the work twice for merges

194

# A small cache to avoid doing the work twice for merges

195

#

195

#

196

# In the vast majority of cases, if we ask information for a revision

196

# In the vast majority of cases, if we ask information for a revision

197

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# about 1 parent, we'll later ask it for the other. So it make sense to

198

# keep the information around when reaching the first parent of a merge

198

# keep the information around when reaching the first parent of a merge

199

# and dropping it after it was provided for the second parents.

199

# and dropping it after it was provided for the second parents.

200

#

200

#

201

# It exists cases were only one parent of the merge will be walked. It

201

# It exists cases were only one parent of the merge will be walked. It

202

# happens when the "destination" the copy tracing is descendant from a

202

# happens when the "destination" the copy tracing is descendant from a

203

# new root, not common with the "source". In that case, we will only walk

203

# new root, not common with the "source". In that case, we will only walk

204

# through merge parents that are descendant of changesets common

204

# through merge parents that are descendant of changesets common

205

# between "source" and "destination".

205

# between "source" and "destination".

206

#

206

#

207

# With the current case implementation if such changesets have a copy

207

# With the current case implementation if such changesets have a copy

208

# information, we'll keep them in memory until the end of

208

# information, we'll keep them in memory until the end of

209

# _changesetforwardcopies. We don't expect the case to be frequent

209

# _changesetforwardcopies. We don't expect the case to be frequent

210

# enough to matters.

210

# enough to matters.

211

#

211

#

212

# In addition, it would be possible to reach pathological case, were

212

# In addition, it would be possible to reach pathological case, were

213

# many first parent are met before any second parent is reached. In

213

# many first parent are met before any second parent is reached. In

214

# that case the cache could grow. If this even become an issue one can

214

# that case the cache could grow. If this even become an issue one can

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

216

# time to save memory.

216

# time to save memory.

217

merge_caches = {}

217

merge_caches = {}

218

219

def revinfo(rev):

219

def revinfo(rev):

220

p1, p2 = parents(rev)

220

p1, p2 = parents(rev)

221

if flags(rev) & REVIDX_SIDEDATA:

221

if flags(rev) & REVIDX_SIDEDATA:

222

e = merge_caches.pop(rev, None)

222

e = merge_caches.pop(rev, None)

223

if e is not None:

223

if e is not None:

224

return e

224

return e

225

c = changelogrevision(rev)

225

c = changelogrevision(rev)

226

p1copies = c.p1copies

226

p1copies = c.p1copies

227

p2copies = c.p2copies

227

p2copies = c.p2copies

228

removed = c.filesremoved

228

removed = c.filesremoved

229

if p1 != node.nullrev and p2 != node.nullrev:

229

if p1 != node.nullrev and p2 != node.nullrev:

230

# XXX some case we over cache, IGNORE

230

# XXX some case we over cache, IGNORE

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

232

else:

232

else:

233

p1copies = {}

233

p1copies = {}

234

p2copies = {}

234

p2copies = {}

235

removed = []

235

removed = []

236

return p1, p2, p1copies, p2copies, removed

236

return p1, p2, p1copies, p2copies, removed

237

238

else:

238

else:

239

240

def revinfo(rev):

240

def revinfo(rev):

241

p1, p2 = parents(rev)

241

p1, p2 = parents(rev)

242

ctx = repo[rev]

242

ctx = repo[rev]

243

p1copies, p2copies = ctx._copies

243

p1copies, p2copies = ctx._copies

244

removed = ctx.filesremoved()

244

removed = ctx.filesremoved()

245

return p1, p2, p1copies, p2copies, removed

245

return p1, p2, p1copies, p2copies, removed

246

247

return revinfo

247

return revinfo

248

249

250

def _changesetforwardcopies(a, b, match):

250

def _changesetforwardcopies(a, b, match):

251

if a.rev() in (node.nullrev, b.rev()):

251

if a.rev() in (node.nullrev, b.rev()):

252

return {}

252

return {}

253

254

repo = a.repo().unfiltered()

254

repo = a.repo().unfiltered()

255

children = {}

255

children = {}

256

revinfo = _revinfogetter(repo)

256

revinfo = _revinfogetter(repo)

257

258

cl = repo.changelog

258

cl = repo.changelog

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

260

mrset = set(missingrevs)

260

mrset = set(missingrevs)

261

roots = set()

261

roots = set()

262

for r in missingrevs:

262

for r in missingrevs:

263

for p in cl.parentrevs(r):

263

for p in cl.parentrevs(r):

264

if p == node.nullrev:

264

if p == node.nullrev:

265

continue

265

continue

266

if p not in children:

266

if p not in children:

267

children[p] = [r]

267

children[p] = [r]

268

else:

268

else:

269

children[p].append(r)

269

children[p].append(r)

270

if p not in mrset:

270

if p not in mrset:

271

roots.add(p)

271

roots.add(p)

272

if not roots:

272

if not roots:

273

# no common revision to track copies from

273

# no common revision to track copies from

274

return {}

274

return {}

275

min_root = min(roots)

275

min_root = min(roots)

276

277

from_head = set(

277

from_head = set(

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

279

)

279

)

280

281

iterrevs = set(from_head)

281

iterrevs = set(from_head)

282

iterrevs &= mrset

282

iterrevs &= mrset

283

iterrevs.update(roots)

283

iterrevs.update(roots)

284

iterrevs.remove(b.rev())

284

iterrevs.remove(b.rev())

285

revs = sorted(iterrevs)

285

revs = sorted(iterrevs)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

287

288

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

290

"""combine the copies information for each item of iterrevs

290

"""combine the copies information for each item of iterrevs

291

292

revs: sorted iterable of revision to visit

292

revs: sorted iterable of revision to visit

293

children: a {parent: [children]} mapping.

293

children: a {parent: [children]} mapping.

294

targetrev: the final copies destination revision (not in iterrevs)

294

targetrev: the final copies destination revision (not in iterrevs)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

296

match: a matcher

296

match: a matcher

297

298

It returns the aggregated copies information for `targetrev`.

298

It returns the aggregated copies information for `targetrev`.

299

"""

299

"""

300

all_copies = {}

300

all_copies = {}

301

alwaysmatch = match.always()

301

alwaysmatch = match.always()

302

for r in revs:

302

for r in revs:

303

copies = all_copies.pop(r, None)

303

copies = all_copies.pop(r, None)

304

if copies is None:

304

if copies is None:

305

# this is a root

305

# this is a root

306

copies = {}

306

copies = {}

307

for i, c in enumerate(children[r]):

307

for i, c in enumerate(children[r]):

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

309

if r == p1:

309

if r == p1:

310

parent = 1

310

parent = 1

311

childcopies = p1copies

311

childcopies = p1copies

312

else:

312

else:

313

assert r == p2

313

assert r == p2

314

parent = 2

314

parent = 2

315

childcopies = p2copies

315

childcopies = p2copies

316

if not alwaysmatch:

316

if not alwaysmatch:

317

childcopies = {

317

childcopies = {

318

dst: src for dst, src in childcopies.items() if match(dst)

318

dst: src for dst, src in childcopies.items() if match(dst)

319

}

319

}

320

newcopies = copies

320

newcopies = copies

321

if childcopies:

321

if childcopies:

322

newcopies = _chain(newcopies, childcopies)

322

newcopies = _chain(newcopies, childcopies)

323

# _chain makes a copies, we can avoid doing so in some

323

# _chain makes a copies, we can avoid doing so in some

324

# simple/linear cases.

324

# simple/linear cases.

325

assert newcopies is not copies

325

assert newcopies is not copies

326

for f in removed:

326

for f in removed:

327

if f in newcopies:

327

if f in newcopies:

328

if newcopies is copies:

328

if newcopies is copies:

329

# copy on write to avoid affecting potential other

329

# copy on write to avoid affecting potential other

330

# branches. when there are no other branches, this

330

# branches. when there are no other branches, this

331

# could be avoided.

331

# could be avoided.

332

newcopies = copies.copy()

332

newcopies = copies.copy()

333

del newcopies[f]

333

del newcopies[f]

334

othercopies = all_copies.get(c)

334

othercopies = all_copies.get(c)

335

if othercopies is None:

335

if othercopies is None:

336

all_copies[c] = newcopies

336

all_copies[c] = newcopies

337

else:

337

else:

338

# we are the second parent to work on c, we need to merge our

338

# we are the second parent to work on c, we need to merge our

339

# work with the other.

339

# work with the other.

340

#

340

#

341

# Unlike when copies are stored in the filelog, we consider

341

# Unlike when copies are stored in the filelog, we consider

342

# it a copy even if the destination already existed on the

342

# it a copy even if the destination already existed on the

343

# other branch. It's simply too expensive to check if the

343

# other branch. It's simply too expensive to check if the

344

# file existed in the manifest.

344

# file existed in the manifest.

345

#

345

#

346

# In case of conflict, parent 1 take precedence over parent 2.

346

# In case of conflict, parent 1 take precedence over parent 2.

347

# This is an arbitrary choice made anew when implementing

347

# This is an arbitrary choice made anew when implementing

348

# changeset based copies. It was made without regards with

348

# changeset based copies. It was made without regards with

349

# potential filelog related behavior.

349

# potential filelog related behavior.

350

if parent == 1:

350

if parent == 1:

351

othercopies.update(newcopies)

351

othercopies.update(newcopies)

352

else:

352

else:

353

newcopies.update(othercopies)

353

newcopies.update(othercopies)

354

all_copies[c] = newcopies

354

all_copies[c] = newcopies

355

return all_copies[targetrev]

355

return all_copies[targetrev]

356

357

358

def _forwardcopies(a, b, base=None, match=None):

358

def _forwardcopies(a, b, base=None, match=None):

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

360

361

if base is None:

361

if base is None:

362

base = a

362

base = a

363

match = a.repo().narrowmatch(match)

363

match = a.repo().narrowmatch(match)

364

# check for working copy

364

# check for working copy

365

if b.rev() is None:

365

if b.rev() is None:

366

cm = _committedforwardcopies(a, b.p1(), base, match)

366

cm = _committedforwardcopies(a, b.p1(), base, match)

367

# combine copies from dirstate if necessary

367

# combine copies from dirstate if necessary

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

369

else:

369

else:

370

copies = _committedforwardcopies(a, b, base, match)

370

copies = _committedforwardcopies(a, b, base, match)

371

return copies

371

return copies

372

373

374

def _backwardrenames(a, b, match):

374

def _backwardrenames(a, b, match):

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

376

return {}

376

return {}

377

378

# Even though we're not taking copies into account, 1:n rename situations

378

# Even though we're not taking copies into account, 1:n rename situations

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

380

# arbitrarily pick one of the renames.

380

# arbitrarily pick one of the renames.

381

# We don't want to pass in "match" here, since that would filter

381

# We don't want to pass in "match" here, since that would filter

382

# the destination by it. Since we're reversing the copies, we want

382

# the destination by it. Since we're reversing the copies, we want

383

# to filter the source instead.

383

# to filter the source instead.

384

f = _forwardcopies(b, a)

384

f = _forwardcopies(b, a)

385

r = {}

385

r = {}

386

for k, v in sorted(pycompat.iteritems(f)):

386

for k, v in sorted(pycompat.iteritems(f)):

387

if match and not match(v):

387

if match and not match(v):

388

continue

388

continue

389

# remove copies

389

# remove copies

390

if v in a:

390

if v in a:

391

continue

391

continue

392

r[v] = k

392

r[v] = k

393

return r

393

return r

394

395

396

def pathcopies(x, y, match=None):

396

def pathcopies(x, y, match=None):

397

"""find {dst@y: src@x} copy mapping for directed compare"""

397

"""find {dst@y: src@x} copy mapping for directed compare"""

398

repo = x._repo

398

repo = x._repo

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

400

if debug:

400

if debug:

401

repo.ui.debug(

401

repo.ui.debug(

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

403

)

403

)

404

if x == y or not x or not y:

404

if x == y or not x or not y:

405

return {}

405

return {}

406

if y.rev() is None and x == y.p1():

407

if debug:

408

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

409

# short-circuit to avoid issues with merge states

410

return _dirstatecopies(repo, match)

406

a = y.ancestor(x)

411

a = y.ancestor(x)

407

if a == x:

412

if a == x:

408

if debug:

413

if debug:

409

repo.ui.debug(b'debug.copies: search mode: forward\n')

414

repo.ui.debug(b'debug.copies: search mode: forward\n')

410

if y.rev() is None and x == y.p1():

411

# short-circuit to avoid issues with merge states

412

return _dirstatecopies(repo, match)

413

copies = _forwardcopies(x, y, match=match)

415

copies = _forwardcopies(x, y, match=match)

414

elif a == y:

416

elif a == y:

415

if debug:

417

if debug:

416

repo.ui.debug(b'debug.copies: search mode: backward\n')

418

repo.ui.debug(b'debug.copies: search mode: backward\n')

417

copies = _backwardrenames(x, y, match=match)

419

copies = _backwardrenames(x, y, match=match)

418

else:

420

else:

419

if debug:

421

if debug:

420

repo.ui.debug(b'debug.copies: search mode: combined\n')

422

repo.ui.debug(b'debug.copies: search mode: combined\n')

421

base = None

423

base = None

422

if a.rev() != node.nullrev:

424

if a.rev() != node.nullrev:

423

base = x

425

base = x

424

copies = _chain(

426

copies = _chain(

425

_backwardrenames(x, a, match=match),

427

_backwardrenames(x, a, match=match),

426

_forwardcopies(a, y, base, match=match),

428

_forwardcopies(a, y, base, match=match),

427

)

429

)

428

_filter(x, y, copies)

430

_filter(x, y, copies)

429

return copies

431

return copies

430

432

431

433

432

def mergecopies(repo, c1, c2, base):

434

def mergecopies(repo, c1, c2, base):

433

"""

435

"""

434

Finds moves and copies between context c1 and c2 that are relevant for

436

Finds moves and copies between context c1 and c2 that are relevant for

435

merging. 'base' will be used as the merge base.

437

merging. 'base' will be used as the merge base.

436

438

437

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

439

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

438

files that were moved/ copied in one merge parent and modified in another.

440

files that were moved/ copied in one merge parent and modified in another.

439

For example:

441

For example:

440

442

441

o ---> 4 another commit

443

o ---> 4 another commit

442

|

444

|

443

| o ---> 3 commit that modifies a.txt

445

| o ---> 3 commit that modifies a.txt

444

| /

446

| /

445

o / ---> 2 commit that moves a.txt to b.txt

447

o / ---> 2 commit that moves a.txt to b.txt

446

|/

448

|/

447

o ---> 1 merge base

449

o ---> 1 merge base

448

450

449

If we try to rebase revision 3 on revision 4, since there is no a.txt in

451

If we try to rebase revision 3 on revision 4, since there is no a.txt in

450

revision 4, and if user have copytrace disabled, we prints the following

452

revision 4, and if user have copytrace disabled, we prints the following

451

message:

453

message:

452

454

453

```other changed <file> which local deleted```

455

```other changed <file> which local deleted```

454

456

455

Returns a tuple where:

457

Returns a tuple where:

456

458

457

"branch_copies" an instance of branch_copies.

459

"branch_copies" an instance of branch_copies.

458

460

459

"diverge" is a mapping of source name -> list of destination names

461

"diverge" is a mapping of source name -> list of destination names

460

for divergent renames.

462

for divergent renames.

461

463

462

This function calls different copytracing algorithms based on config.

464

This function calls different copytracing algorithms based on config.

463

"""

465

"""

464

# avoid silly behavior for update from empty dir

466

# avoid silly behavior for update from empty dir

465

if not c1 or not c2 or c1 == c2:

467

if not c1 or not c2 or c1 == c2:

466

return branch_copies(), branch_copies(), {}

468

return branch_copies(), branch_copies(), {}

467

469

468

narrowmatch = c1.repo().narrowmatch()

470

narrowmatch = c1.repo().narrowmatch()

469

471

470

# avoid silly behavior for parent -> working dir

472

# avoid silly behavior for parent -> working dir

471

if c2.node() is None and c1.node() == repo.dirstate.p1():

473

if c2.node() is None and c1.node() == repo.dirstate.p1():

472

return (

474

return (

473

branch_copies(_dirstatecopies(repo, narrowmatch)),

475

branch_copies(_dirstatecopies(repo, narrowmatch)),

474

branch_copies(),

476

branch_copies(),

475

{},

477

{},

476

)

478

)

477

479

478

copytracing = repo.ui.config(b'experimental', b'copytrace')

480

copytracing = repo.ui.config(b'experimental', b'copytrace')

479

if stringutil.parsebool(copytracing) is False:

481

if stringutil.parsebool(copytracing) is False:

480

# stringutil.parsebool() returns None when it is unable to parse the

482

# stringutil.parsebool() returns None when it is unable to parse the

481

# value, so we should rely on making sure copytracing is on such cases

483

# value, so we should rely on making sure copytracing is on such cases

482

return branch_copies(), branch_copies(), {}

484

return branch_copies(), branch_copies(), {}

483

485

484

if usechangesetcentricalgo(repo):

486

if usechangesetcentricalgo(repo):

485

# The heuristics don't make sense when we need changeset-centric algos

487

# The heuristics don't make sense when we need changeset-centric algos

486

return _fullcopytracing(repo, c1, c2, base)

488

return _fullcopytracing(repo, c1, c2, base)

487

489

488

# Copy trace disabling is explicitly below the node == p1 logic above

490

# Copy trace disabling is explicitly below the node == p1 logic above

489

# because the logic above is required for a simple copy to be kept across a

491

# because the logic above is required for a simple copy to be kept across a

490

# rebase.

492

# rebase.

491

if copytracing == b'heuristics':

493

if copytracing == b'heuristics':

492

# Do full copytracing if only non-public revisions are involved as

494

# Do full copytracing if only non-public revisions are involved as

493

# that will be fast enough and will also cover the copies which could

495

# that will be fast enough and will also cover the copies which could

494

# be missed by heuristics

496

# be missed by heuristics

495

if _isfullcopytraceable(repo, c1, base):

497

if _isfullcopytraceable(repo, c1, base):

496

return _fullcopytracing(repo, c1, c2, base)

498

return _fullcopytracing(repo, c1, c2, base)

497

return _heuristicscopytracing(repo, c1, c2, base)

499

return _heuristicscopytracing(repo, c1, c2, base)

498

else:

500

else:

499

return _fullcopytracing(repo, c1, c2, base)

501

return _fullcopytracing(repo, c1, c2, base)

500

502

501

503

502

def _isfullcopytraceable(repo, c1, base):

504

def _isfullcopytraceable(repo, c1, base):

503

""" Checks that if base, source and destination are all no-public branches,

505

""" Checks that if base, source and destination are all no-public branches,

504

if yes let's use the full copytrace algorithm for increased capabilities

506

if yes let's use the full copytrace algorithm for increased capabilities

505

since it will be fast enough.

507

since it will be fast enough.

506

508

507

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

509

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

508

number of changesets from c1 to base such that if number of changesets are

510

number of changesets from c1 to base such that if number of changesets are

509

more than the limit, full copytracing algorithm won't be used.

511

more than the limit, full copytracing algorithm won't be used.

510

"""

512

"""

511

if c1.rev() is None:

513

if c1.rev() is None:

512

c1 = c1.p1()

514

c1 = c1.p1()

513

if c1.mutable() and base.mutable():

515

if c1.mutable() and base.mutable():

514

sourcecommitlimit = repo.ui.configint(

516

sourcecommitlimit = repo.ui.configint(

515

b'experimental', b'copytrace.sourcecommitlimit'

517

b'experimental', b'copytrace.sourcecommitlimit'

516

)

518

)

517

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

519

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

518

return commits < sourcecommitlimit

520

return commits < sourcecommitlimit

519

return False

521

return False

520

522

521

523

522

def _checksinglesidecopies(

524

def _checksinglesidecopies(

523

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

525

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

524

):

526

):

525

if src not in m2:

527

if src not in m2:

526

# deleted on side 2

528

# deleted on side 2

527

if src not in m1:

529

if src not in m1:

528

# renamed on side 1, deleted on side 2

530

# renamed on side 1, deleted on side 2

529

renamedelete[src] = dsts1

531

renamedelete[src] = dsts1

530

elif src not in mb:

532

elif src not in mb:

531

# Work around the "short-circuit to avoid issues with merge states"

533

# Work around the "short-circuit to avoid issues with merge states"

532

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

534

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

533

# destination doesn't exist in y.

535

# destination doesn't exist in y.

534

pass

536

pass

535

elif m2[src] != mb[src]:

537

elif m2[src] != mb[src]:

536

if not _related(c2[src], base[src]):

538

if not _related(c2[src], base[src]):

537

return

539

return

538

# modified on side 2

540

# modified on side 2

539

for dst in dsts1:

541

for dst in dsts1:

540

copy[dst] = src

542

copy[dst] = src

541

543

542

544

543

class branch_copies(object):

545

class branch_copies(object):

544

"""Information about copies made on one side of a merge/graft.

546

"""Information about copies made on one side of a merge/graft.

545

547

546

"copy" is a mapping from destination name -> source name,

548

"copy" is a mapping from destination name -> source name,

547

where source is in c1 and destination is in c2 or vice-versa.

549

where source is in c1 and destination is in c2 or vice-versa.

548

550

549

"movewithdir" is a mapping from source name -> destination name,

551

"movewithdir" is a mapping from source name -> destination name,

550

where the file at source present in one context but not the other

552

where the file at source present in one context but not the other

551

needs to be moved to destination by the merge process, because the

553

needs to be moved to destination by the merge process, because the

552

other context moved the directory it is in.

554

other context moved the directory it is in.

553

555

554

"renamedelete" is a mapping of source name -> list of destination

556

"renamedelete" is a mapping of source name -> list of destination

555

names for files deleted in c1 that were renamed in c2 or vice-versa.

557

names for files deleted in c1 that were renamed in c2 or vice-versa.

556

558

557

"dirmove" is a mapping of detected source dir -> destination dir renames.

559

"dirmove" is a mapping of detected source dir -> destination dir renames.

558

This is needed for handling changes to new files previously grafted into

560

This is needed for handling changes to new files previously grafted into

559

renamed directories.

561

renamed directories.

560

"""

562

"""

561

563

562

def __init__(

564

def __init__(

563

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

565

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

564

):

566

):

565

self.copy = {} if copy is None else copy

567

self.copy = {} if copy is None else copy

566

self.renamedelete = {} if renamedelete is None else renamedelete

568

self.renamedelete = {} if renamedelete is None else renamedelete

567

self.dirmove = {} if dirmove is None else dirmove

569

self.dirmove = {} if dirmove is None else dirmove

568

self.movewithdir = {} if movewithdir is None else movewithdir

570

self.movewithdir = {} if movewithdir is None else movewithdir

569

571

570

572

571

def _fullcopytracing(repo, c1, c2, base):

573

def _fullcopytracing(repo, c1, c2, base):

572

""" The full copytracing algorithm which finds all the new files that were

574

""" The full copytracing algorithm which finds all the new files that were

573

added from merge base up to the top commit and for each file it checks if

575

added from merge base up to the top commit and for each file it checks if

574

this file was copied from another file.

576

this file was copied from another file.

575

577

576

This is pretty slow when a lot of changesets are involved but will track all

578

This is pretty slow when a lot of changesets are involved but will track all

577

the copies.

579

the copies.

578

"""

580

"""

579

m1 = c1.manifest()

581

m1 = c1.manifest()

580

m2 = c2.manifest()

582

m2 = c2.manifest()

581

mb = base.manifest()

583

mb = base.manifest()

582

584

583

copies1 = pathcopies(base, c1)

585

copies1 = pathcopies(base, c1)

584

copies2 = pathcopies(base, c2)

586

copies2 = pathcopies(base, c2)

585

587

586

if not (copies1 or copies2):

588

if not (copies1 or copies2):

587

return branch_copies(), branch_copies(), {}

589

return branch_copies(), branch_copies(), {}

588

590

589

inversecopies1 = {}

591

inversecopies1 = {}

590

inversecopies2 = {}

592

inversecopies2 = {}

591

for dst, src in copies1.items():

593

for dst, src in copies1.items():

592

inversecopies1.setdefault(src, []).append(dst)

594

inversecopies1.setdefault(src, []).append(dst)

593

for dst, src in copies2.items():

595

for dst, src in copies2.items():

594

inversecopies2.setdefault(src, []).append(dst)

596

inversecopies2.setdefault(src, []).append(dst)

595

597

596

copy1 = {}

598

copy1 = {}

597

copy2 = {}

599

copy2 = {}

598

diverge = {}

600

diverge = {}

599

renamedelete1 = {}

601

renamedelete1 = {}

600

renamedelete2 = {}

602

renamedelete2 = {}

601

allsources = set(inversecopies1) | set(inversecopies2)

603

allsources = set(inversecopies1) | set(inversecopies2)

602

for src in allsources:

604

for src in allsources:

603

dsts1 = inversecopies1.get(src)

605

dsts1 = inversecopies1.get(src)

604

dsts2 = inversecopies2.get(src)

606

dsts2 = inversecopies2.get(src)

605

if dsts1 and dsts2:

607

if dsts1 and dsts2:

606

# copied/renamed on both sides

608

# copied/renamed on both sides

607

if src not in m1 and src not in m2:

609

if src not in m1 and src not in m2:

608

# renamed on both sides

610

# renamed on both sides

609

dsts1 = set(dsts1)

611

dsts1 = set(dsts1)

610

dsts2 = set(dsts2)

612

dsts2 = set(dsts2)

611

# If there's some overlap in the rename destinations, we

613

# If there's some overlap in the rename destinations, we

612

# consider it not divergent. For example, if side 1 copies 'a'

614

# consider it not divergent. For example, if side 1 copies 'a'

613

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

615

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

614

# and 'd' and deletes 'a'.

616

# and 'd' and deletes 'a'.

615

if dsts1 & dsts2:

617

if dsts1 & dsts2:

616

for dst in dsts1 & dsts2:

618

for dst in dsts1 & dsts2:

617

copy1[dst] = src

619

copy1[dst] = src

618

copy2[dst] = src

620

copy2[dst] = src

619

else:

621

else:

620

diverge[src] = sorted(dsts1 | dsts2)

622

diverge[src] = sorted(dsts1 | dsts2)

621

elif src in m1 and src in m2:

623

elif src in m1 and src in m2:

622

# copied on both sides

624

# copied on both sides

623

dsts1 = set(dsts1)

625

dsts1 = set(dsts1)

624

dsts2 = set(dsts2)

626

dsts2 = set(dsts2)

625

for dst in dsts1 & dsts2:

627

for dst in dsts1 & dsts2:

626

copy1[dst] = src

628

copy1[dst] = src

627

copy2[dst] = src

629

copy2[dst] = src

628

# TODO: Handle cases where it was renamed on one side and copied

630

# TODO: Handle cases where it was renamed on one side and copied

629

# on the other side

631

# on the other side

630

elif dsts1:

632

elif dsts1:

631

# copied/renamed only on side 1

633

# copied/renamed only on side 1

632

_checksinglesidecopies(

634

_checksinglesidecopies(

633

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

635

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

634

)

636

)

635

elif dsts2:

637

elif dsts2:

636

# copied/renamed only on side 2

638

# copied/renamed only on side 2

637

_checksinglesidecopies(

639

_checksinglesidecopies(

638

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

640

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

639

)

641

)

640

642

641

# find interesting file sets from manifests

643

# find interesting file sets from manifests

642

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

644

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

643

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

645

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

644

u1 = sorted(addedinm1 - addedinm2)

646

u1 = sorted(addedinm1 - addedinm2)

645

u2 = sorted(addedinm2 - addedinm1)

647

u2 = sorted(addedinm2 - addedinm1)

646

648

647

header = b" unmatched files in %s"

649

header = b" unmatched files in %s"

648

if u1:

650

if u1:

649

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

651

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

650

if u2:

652

if u2:

651

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

653

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

652

654

653

if repo.ui.debugflag:

655

if repo.ui.debugflag:

654

renamedeleteset = set()

656

renamedeleteset = set()

655

divergeset = set()

657

divergeset = set()

656

for dsts in diverge.values():

658

for dsts in diverge.values():

657

divergeset.update(dsts)

659

divergeset.update(dsts)

658

for dsts in renamedelete1.values():

660

for dsts in renamedelete1.values():

659

renamedeleteset.update(dsts)

661

renamedeleteset.update(dsts)

660

for dsts in renamedelete2.values():

662

for dsts in renamedelete2.values():

661

renamedeleteset.update(dsts)

663

renamedeleteset.update(dsts)

662

664

663

repo.ui.debug(

665

repo.ui.debug(

664

b" all copies found (* = to merge, ! = divergent, "

666

b" all copies found (* = to merge, ! = divergent, "

665

b"% = renamed and deleted):\n"

667

b"% = renamed and deleted):\n"

666

)

668

)

667

for side, copies in ((b"local", copies1), (b"remote", copies2)):

669

for side, copies in ((b"local", copies1), (b"remote", copies2)):

668

if not copies:

670

if not copies:

669

continue

671

continue

670

repo.ui.debug(b" on %s side:\n" % side)

672

repo.ui.debug(b" on %s side:\n" % side)

671

for f in sorted(copies):

673

for f in sorted(copies):

672

note = b""

674

note = b""

673

if f in copy1 or f in copy2:

675

if f in copy1 or f in copy2:

674

note += b"*"

676

note += b"*"

675

if f in divergeset:

677

if f in divergeset:

676

note += b"!"

678

note += b"!"

677

if f in renamedeleteset:

679

if f in renamedeleteset:

678

note += b"%"

680

note += b"%"

679

repo.ui.debug(

681

repo.ui.debug(

680

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

682

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

681

)

683

)

682

del renamedeleteset

684

del renamedeleteset

683

del divergeset

685

del divergeset

684

686

685

repo.ui.debug(b" checking for directory renames\n")

687

repo.ui.debug(b" checking for directory renames\n")

686

688

687

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

689

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

688

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

690

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

689

691

690

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

692

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

691

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

693

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

692

694

693

return branch_copies1, branch_copies2, diverge

695

return branch_copies1, branch_copies2, diverge

694

696

695

697

696

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

698

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

697

"""Finds moved directories and files that should move with them.

699

"""Finds moved directories and files that should move with them.

698

700

699

ctx: the context for one of the sides

701

ctx: the context for one of the sides

700

copy: files copied on the same side (as ctx)

702

copy: files copied on the same side (as ctx)

701

fullcopy: files copied on the same side (as ctx), including those that

703

fullcopy: files copied on the same side (as ctx), including those that

702

merge.manifestmerge() won't care about

704

merge.manifestmerge() won't care about

703

addedfiles: added files on the other side (compared to ctx)

705

addedfiles: added files on the other side (compared to ctx)

704

"""

706

"""

705

# generate a directory move map

707

# generate a directory move map

706

d = ctx.dirs()

708

d = ctx.dirs()

707

invalid = set()

709

invalid = set()

708

dirmove = {}

710

dirmove = {}

709

711

710

# examine each file copy for a potential directory move, which is

712

# examine each file copy for a potential directory move, which is

711

# when all the files in a directory are moved to a new directory

713

# when all the files in a directory are moved to a new directory

712

for dst, src in pycompat.iteritems(fullcopy):

714

for dst, src in pycompat.iteritems(fullcopy):

713

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

715

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

714

if dsrc in invalid:

716

if dsrc in invalid:

715

# already seen to be uninteresting

717

# already seen to be uninteresting

716

continue

718

continue

717

elif dsrc in d and ddst in d:

719

elif dsrc in d and ddst in d:

718

# directory wasn't entirely moved locally

720

# directory wasn't entirely moved locally

719

invalid.add(dsrc)

721

invalid.add(dsrc)

720

elif dsrc in dirmove and dirmove[dsrc] != ddst:

722

elif dsrc in dirmove and dirmove[dsrc] != ddst:

721

# files from the same directory moved to two different places

723

# files from the same directory moved to two different places

722

invalid.add(dsrc)

724

invalid.add(dsrc)

723

else:

725

else:

724

# looks good so far

726

# looks good so far

725

dirmove[dsrc] = ddst

727

dirmove[dsrc] = ddst

726

728

727

for i in invalid:

729

for i in invalid:

728

if i in dirmove:

730

if i in dirmove:

729

del dirmove[i]

731

del dirmove[i]

730

del d, invalid

732

del d, invalid

731

733

732

if not dirmove:

734

if not dirmove:

733

return {}, {}

735

return {}, {}

734

736

735

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

737

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

736

738

737

for d in dirmove:

739

for d in dirmove:

738

repo.ui.debug(

740

repo.ui.debug(

739

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

741

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

740

)

742

)

741

743

742

movewithdir = {}

744

movewithdir = {}

743

# check unaccounted nonoverlapping files against directory moves

745

# check unaccounted nonoverlapping files against directory moves

744

for f in addedfiles:

746

for f in addedfiles:

745

if f not in fullcopy:

747

if f not in fullcopy:

746

for d in dirmove:

748

for d in dirmove:

747

if f.startswith(d):

749

if f.startswith(d):

748

# new file added in a directory that was moved, move it

750

# new file added in a directory that was moved, move it

749

df = dirmove[d] + f[len(d) :]

751

df = dirmove[d] + f[len(d) :]

750

if df not in copy:

752

if df not in copy:

751

movewithdir[f] = df

753

movewithdir[f] = df

752

repo.ui.debug(

754

repo.ui.debug(

753

b" pending file src: '%s' -> dst: '%s'\n"

755

b" pending file src: '%s' -> dst: '%s'\n"

754

% (f, df)

756

% (f, df)

755

)

757

)

756

break

758

break

757

759

758

return dirmove, movewithdir

760

return dirmove, movewithdir

759

761

760

762

761

def _heuristicscopytracing(repo, c1, c2, base):

763

def _heuristicscopytracing(repo, c1, c2, base):

762

""" Fast copytracing using filename heuristics

764

""" Fast copytracing using filename heuristics

763

765

764

Assumes that moves or renames are of following two types:

766

Assumes that moves or renames are of following two types:

765

767

766

1) Inside a directory only (same directory name but different filenames)

768

1) Inside a directory only (same directory name but different filenames)

767

2) Move from one directory to another

769

2) Move from one directory to another

768

(same filenames but different directory names)

770

(same filenames but different directory names)

769

771

770

Works only when there are no merge commits in the "source branch".

772

Works only when there are no merge commits in the "source branch".

771

Source branch is commits from base up to c2 not including base.

773

Source branch is commits from base up to c2 not including base.

772

774

773

If merge is involved it fallbacks to _fullcopytracing().

775

If merge is involved it fallbacks to _fullcopytracing().

774

776

775

Can be used by setting the following config:

777

Can be used by setting the following config:

776

778

777

[experimental]

779

[experimental]

778

copytrace = heuristics

780

copytrace = heuristics

779

781

780

In some cases the copy/move candidates found by heuristics can be very large

782

In some cases the copy/move candidates found by heuristics can be very large

781

in number and that will make the algorithm slow. The number of possible

783

in number and that will make the algorithm slow. The number of possible

782

candidates to check can be limited by using the config

784

candidates to check can be limited by using the config

783

`experimental.copytrace.movecandidateslimit` which defaults to 100.

785

`experimental.copytrace.movecandidateslimit` which defaults to 100.

784

"""

786

"""

785

787

786

if c1.rev() is None:

788

if c1.rev() is None:

787

c1 = c1.p1()

789

c1 = c1.p1()

788

if c2.rev() is None:

790

if c2.rev() is None:

789

c2 = c2.p1()

791

c2 = c2.p1()

790

792

791

changedfiles = set()

793

changedfiles = set()

792

m1 = c1.manifest()

794

m1 = c1.manifest()

793

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

795

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

794

# If base is not in c2 branch, we switch to fullcopytracing

796

# If base is not in c2 branch, we switch to fullcopytracing

795

repo.ui.debug(

797

repo.ui.debug(

796

b"switching to full copytracing as base is not "

798

b"switching to full copytracing as base is not "

797

b"an ancestor of c2\n"

799

b"an ancestor of c2\n"

798

)

800

)

799

return _fullcopytracing(repo, c1, c2, base)

801

return _fullcopytracing(repo, c1, c2, base)

800

802

801

ctx = c2

803

ctx = c2

802

while ctx != base:

804

while ctx != base:

803

if len(ctx.parents()) == 2:

805

if len(ctx.parents()) == 2:

804

# To keep things simple let's not handle merges

806

# To keep things simple let's not handle merges

805

repo.ui.debug(b"switching to full copytracing because of merges\n")

807

repo.ui.debug(b"switching to full copytracing because of merges\n")

806

return _fullcopytracing(repo, c1, c2, base)

808

return _fullcopytracing(repo, c1, c2, base)

807

changedfiles.update(ctx.files())

809

changedfiles.update(ctx.files())

808

ctx = ctx.p1()

810

ctx = ctx.p1()

809

811

810

copies2 = {}

812

copies2 = {}

811

cp = _forwardcopies(base, c2)

813

cp = _forwardcopies(base, c2)

812

for dst, src in pycompat.iteritems(cp):

814

for dst, src in pycompat.iteritems(cp):

813

if src in m1:

815

if src in m1:

814

copies2[dst] = src

816

copies2[dst] = src

815

817

816

# file is missing if it isn't present in the destination, but is present in

818

# file is missing if it isn't present in the destination, but is present in

817

# the base and present in the source.

819

# the base and present in the source.

818

# Presence in the base is important to exclude added files, presence in the

820

# Presence in the base is important to exclude added files, presence in the

819

# source is important to exclude removed files.

821

# source is important to exclude removed files.

820

filt = lambda f: f not in m1 and f in base and f in c2

822

filt = lambda f: f not in m1 and f in base and f in c2

821

missingfiles = [f for f in changedfiles if filt(f)]

823

missingfiles = [f for f in changedfiles if filt(f)]

822

824

823

copies1 = {}

825

copies1 = {}

824

if missingfiles:

826

if missingfiles:

825

basenametofilename = collections.defaultdict(list)

827

basenametofilename = collections.defaultdict(list)

826

dirnametofilename = collections.defaultdict(list)

828

dirnametofilename = collections.defaultdict(list)

827

829

828

for f in m1.filesnotin(base.manifest()):

830

for f in m1.filesnotin(base.manifest()):

829

basename = os.path.basename(f)

831

basename = os.path.basename(f)

830

dirname = os.path.dirname(f)

832

dirname = os.path.dirname(f)

831

basenametofilename[basename].append(f)

833

basenametofilename[basename].append(f)

832

dirnametofilename[dirname].append(f)

834

dirnametofilename[dirname].append(f)

833

835

834

for f in missingfiles:

836

for f in missingfiles:

835

basename = os.path.basename(f)

837

basename = os.path.basename(f)

836

dirname = os.path.dirname(f)

838

dirname = os.path.dirname(f)

837

samebasename = basenametofilename[basename]

839

samebasename = basenametofilename[basename]

838

samedirname = dirnametofilename[dirname]

840

samedirname = dirnametofilename[dirname]

839

movecandidates = samebasename + samedirname

841

movecandidates = samebasename + samedirname

840

# f is guaranteed to be present in c2, that's why

842

# f is guaranteed to be present in c2, that's why

841

# c2.filectx(f) won't fail

843

# c2.filectx(f) won't fail

842

f2 = c2.filectx(f)

844

f2 = c2.filectx(f)

843

# we can have a lot of candidates which can slow down the heuristics

845

# we can have a lot of candidates which can slow down the heuristics

844

# config value to limit the number of candidates moves to check

846

# config value to limit the number of candidates moves to check

845

maxcandidates = repo.ui.configint(

847

maxcandidates = repo.ui.configint(

846

b'experimental', b'copytrace.movecandidateslimit'

848

b'experimental', b'copytrace.movecandidateslimit'

847

)

849

)

848

850

849

if len(movecandidates) > maxcandidates:

851

if len(movecandidates) > maxcandidates:

850

repo.ui.status(

852

repo.ui.status(

851

_(

853

_(

852

b"skipping copytracing for '%s', more "

854

b"skipping copytracing for '%s', more "

853

b"candidates than the limit: %d\n"

855

b"candidates than the limit: %d\n"

854

)

856

)

855

% (f, len(movecandidates))

857

% (f, len(movecandidates))

856

)

858

)

857

continue

859

continue

858

860

859

for candidate in movecandidates:

861

for candidate in movecandidates:

860

f1 = c1.filectx(candidate)

862

f1 = c1.filectx(candidate)

861

if _related(f1, f2):

863

if _related(f1, f2):

862

# if there are a few related copies then we'll merge

864

# if there are a few related copies then we'll merge

863

# changes into all of them. This matches the behaviour

865

# changes into all of them. This matches the behaviour

864

# of upstream copytracing

866

# of upstream copytracing

865

copies1[candidate] = f

867

copies1[candidate] = f

866

868

867

return branch_copies(copies1), branch_copies(copies2), {}

869

return branch_copies(copies1), branch_copies(copies2), {}

868

870

869

871

870

def _related(f1, f2):

872

def _related(f1, f2):

871

"""return True if f1 and f2 filectx have a common ancestor

873

"""return True if f1 and f2 filectx have a common ancestor

872

874

873

Walk back to common ancestor to see if the two files originate

875

Walk back to common ancestor to see if the two files originate

874

from the same file. Since workingfilectx's rev() is None it messes

876

from the same file. Since workingfilectx's rev() is None it messes

875

up the integer comparison logic, hence the pre-step check for

877

up the integer comparison logic, hence the pre-step check for

876

None (f1 and f2 can only be workingfilectx's initially).

878

None (f1 and f2 can only be workingfilectx's initially).

877

"""

879

"""

878

880

879

if f1 == f2:

881

if f1 == f2:

880

return True # a match

882

return True # a match

881

883

882

g1, g2 = f1.ancestors(), f2.ancestors()

884

g1, g2 = f1.ancestors(), f2.ancestors()

883

try:

885

try:

884

f1r, f2r = f1.linkrev(), f2.linkrev()

886

f1r, f2r = f1.linkrev(), f2.linkrev()

885

887

886

if f1r is None:

888

if f1r is None:

887

f1 = next(g1)

889

f1 = next(g1)

888

if f2r is None:

890

if f2r is None:

889

f2 = next(g2)

891

f2 = next(g2)

890

892

891

while True:

893

while True:

892

f1r, f2r = f1.linkrev(), f2.linkrev()

894

f1r, f2r = f1.linkrev(), f2.linkrev()

893

if f1r > f2r:

895

if f1r > f2r:

894

f1 = next(g1)

896

f1 = next(g1)

895

elif f2r > f1r:

897

elif f2r > f1r:

896

f2 = next(g2)

898

f2 = next(g2)

897

else: # f1 and f2 point to files in the same linkrev

899

else: # f1 and f2 point to files in the same linkrev

898

return f1 == f2 # true if they point to the same file

900

return f1 == f2 # true if they point to the same file

899

except StopIteration:

901

except StopIteration:

900

return False

902

return False

901

903

902

904

903

def graftcopies(wctx, ctx, base):

905

def graftcopies(wctx, ctx, base):

904

"""reproduce copies between base and ctx in the wctx

906

"""reproduce copies between base and ctx in the wctx

905

907

906

Unlike mergecopies(), this function will only consider copies between base

908

Unlike mergecopies(), this function will only consider copies between base

907

and ctx; it will ignore copies between base and wctx. Also unlike

909

and ctx; it will ignore copies between base and wctx. Also unlike

908

mergecopies(), this function will apply copies to the working copy (instead

910

mergecopies(), this function will apply copies to the working copy (instead

909

of just returning information about the copies). That makes it cheaper

911

of just returning information about the copies). That makes it cheaper

910

(especially in the common case of base==ctx.p1()) and useful also when

912

(especially in the common case of base==ctx.p1()) and useful also when

911

experimental.copytrace=off.

913

experimental.copytrace=off.

912

914

913

merge.update() will have already marked most copies, but it will only

915

merge.update() will have already marked most copies, but it will only

914

mark copies if it thinks the source files are related (see

916

mark copies if it thinks the source files are related (see

915

merge._related()). It will also not mark copies if the file wasn't modified

917

merge._related()). It will also not mark copies if the file wasn't modified

916

on the local side. This function adds the copies that were "missed"

918

on the local side. This function adds the copies that were "missed"

917

by merge.update().

919

by merge.update().

918

"""

920

"""

919

new_copies = pathcopies(base, ctx)

921

new_copies = pathcopies(base, ctx)

920

_filter(wctx.p1(), wctx, new_copies)

922

_filter(wctx.p1(), wctx, new_copies)

921

for dst, src in pycompat.iteritems(new_copies):

923

for dst, src in pycompat.iteritems(new_copies):

922

wctx[dst].markcopied(src)

924

wctx[dst].markcopied(src)

923

925

924

926

925

def computechangesetfilesadded(ctx):

927

def computechangesetfilesadded(ctx):

926

"""return the list of files added in a changeset

928

"""return the list of files added in a changeset

927

"""

929

"""

928

added = []

930

added = []

929

for f in ctx.files():

931

for f in ctx.files():

930

if not any(f in p for p in ctx.parents()):

932

if not any(f in p for p in ctx.parents()):

931

added.append(f)

933

added.append(f)

932

return added

934

return added

933

935

934

936

935

def computechangesetfilesremoved(ctx):

937

def computechangesetfilesremoved(ctx):

936

"""return the list of files removed in a changeset

938

"""return the list of files removed in a changeset

937

"""

939

"""

938

removed = []

940

removed = []

939

for f in ctx.files():

941

for f in ctx.files():

940

if f not in ctx:

942

if f not in ctx:

941

removed.append(f)

943

removed.append(f)

942

return removed

944

return removed

943

945

944

946

945

def computechangesetcopies(ctx):

947

def computechangesetcopies(ctx):

946

"""return the copies data for a changeset

948

"""return the copies data for a changeset

947

949

948

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

950

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

949

951

950

Each dictionnary are in the form: `{newname: oldname}`

952

Each dictionnary are in the form: `{newname: oldname}`

951

"""

953

"""

952

p1copies = {}

954

p1copies = {}

953

p2copies = {}

955

p2copies = {}

954

p1 = ctx.p1()

956

p1 = ctx.p1()

955

p2 = ctx.p2()

957

p2 = ctx.p2()

956

narrowmatch = ctx._repo.narrowmatch()

958

narrowmatch = ctx._repo.narrowmatch()

957

for dst in ctx.files():

959

for dst in ctx.files():

958

if not narrowmatch(dst) or dst not in ctx:

960

if not narrowmatch(dst) or dst not in ctx:

959

continue

961

continue

960

copied = ctx[dst].renamed()

962

copied = ctx[dst].renamed()

961

if not copied:

963

if not copied:

962

continue

964

continue

963

src, srcnode = copied

965

src, srcnode = copied

964

if src in p1 and p1[src].filenode() == srcnode:

966

if src in p1 and p1[src].filenode() == srcnode:

965

p1copies[dst] = src

967

p1copies[dst] = src

966

elif src in p2 and p2[src].filenode() == srcnode:

968

elif src in p2 and p2[src].filenode() == srcnode:

967

p2copies[dst] = src

969

p2copies[dst] = src

968

return p1copies, p2copies

970

return p1copies, p2copies

969

971

970

972

971

def encodecopies(files, copies):

973

def encodecopies(files, copies):

972

items = []

974

items = []

973

for i, dst in enumerate(files):

975

for i, dst in enumerate(files):

974

if dst in copies:

976

if dst in copies:

975

items.append(b'%d\0%s' % (i, copies[dst]))

977

items.append(b'%d\0%s' % (i, copies[dst]))

976

if len(items) != len(copies):

978

if len(items) != len(copies):

977

raise error.ProgrammingError(

979

raise error.ProgrammingError(

978

b'some copy targets missing from file list'

980

b'some copy targets missing from file list'

979

)

981

)

980

return b"\n".join(items)

982

return b"\n".join(items)

981

983

982

984

983

def decodecopies(files, data):

985

def decodecopies(files, data):

984

try:

986

try:

985

copies = {}

987

copies = {}

986

if not data:

988

if not data:

987

return copies

989

return copies

988

for l in data.split(b'\n'):

990

for l in data.split(b'\n'):

989

strindex, src = l.split(b'\0')

991

strindex, src = l.split(b'\0')

990

i = int(strindex)

992

i = int(strindex)

991

dst = files[i]

993

dst = files[i]

992

copies[dst] = src

994

copies[dst] = src

993

return copies

995

return copies

994

except (ValueError, IndexError):

996

except (ValueError, IndexError):

995

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

997

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

996

# used different syntax for the value.

998

# used different syntax for the value.

997

return None

999

return None

998

1000

999

1001

1000

def encodefileindices(files, subset):

1002

def encodefileindices(files, subset):

1001

subset = set(subset)

1003

subset = set(subset)

1002

indices = []

1004

indices = []

1003

for i, f in enumerate(files):

1005

for i, f in enumerate(files):

1004

if f in subset:

1006

if f in subset:

1005

indices.append(b'%d' % i)

1007

indices.append(b'%d' % i)

1006

return b'\n'.join(indices)

1008

return b'\n'.join(indices)

1007

1009

1008

1010

1009

def decodefileindices(files, data):

1011

def decodefileindices(files, data):

1010

try:

1012

try:

1011

subset = []

1013

subset = []

1012

if not data:

1014

if not data:

1013

return subset

1015

return subset

1014

for strindex in data.split(b'\n'):

1016

for strindex in data.split(b'\n'):

1015

i = int(strindex)

1017

i = int(strindex)

1016

if i < 0 or i >= len(files):

1018

if i < 0 or i >= len(files):

1017

return None

1019

return None

1018

subset.append(files[i])

1020

subset.append(files[i])

1019

return subset

1021

return subset

1020

except (ValueError, IndexError):

1022

except (ValueError, IndexError):

1021

# Perhaps someone had chosen the same key name (e.g. "added") and

1023

# Perhaps someone had chosen the same key name (e.g. "added") and

1022

# used different syntax for the value.

1024

# used different syntax for the value.

1023

return None

1025

return None

1024

1026

1025

1027

1026

def _getsidedata(srcrepo, rev):

1028

def _getsidedata(srcrepo, rev):

1027

ctx = srcrepo[rev]

1029

ctx = srcrepo[rev]

1028

filescopies = computechangesetcopies(ctx)

1030

filescopies = computechangesetcopies(ctx)

1029

filesadded = computechangesetfilesadded(ctx)

1031

filesadded = computechangesetfilesadded(ctx)

1030

filesremoved = computechangesetfilesremoved(ctx)

1032

filesremoved = computechangesetfilesremoved(ctx)

1031

sidedata = {}

1033

sidedata = {}

1032

if any([filescopies, filesadded, filesremoved]):

1034

if any([filescopies, filesadded, filesremoved]):

1033

sortedfiles = sorted(ctx.files())

1035

sortedfiles = sorted(ctx.files())

1034

p1copies, p2copies = filescopies

1036

p1copies, p2copies = filescopies

1035

p1copies = encodecopies(sortedfiles, p1copies)

1037

p1copies = encodecopies(sortedfiles, p1copies)

1036

p2copies = encodecopies(sortedfiles, p2copies)

1038

p2copies = encodecopies(sortedfiles, p2copies)

1037

filesadded = encodefileindices(sortedfiles, filesadded)

1039

filesadded = encodefileindices(sortedfiles, filesadded)

1038

filesremoved = encodefileindices(sortedfiles, filesremoved)

1040

filesremoved = encodefileindices(sortedfiles, filesremoved)

1039

if p1copies:

1041

if p1copies:

1040

sidedata[sidedatamod.SD_P1COPIES] = p1copies

1042

sidedata[sidedatamod.SD_P1COPIES] = p1copies

1041

if p2copies:

1043

if p2copies:

1042

sidedata[sidedatamod.SD_P2COPIES] = p2copies

1044

sidedata[sidedatamod.SD_P2COPIES] = p2copies

1043

if filesadded:

1045

if filesadded:

1044

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1046

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1045

if filesremoved:

1047

if filesremoved:

1046

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1048

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1047

return sidedata

1049

return sidedata

1048

1050

1049

1051

1050

def getsidedataadder(srcrepo, destrepo):

1052

def getsidedataadder(srcrepo, destrepo):

1051

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

1053

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

1052

if pycompat.iswindows or not use_w:

1054

if pycompat.iswindows or not use_w:

1053

return _get_simple_sidedata_adder(srcrepo, destrepo)

1055

return _get_simple_sidedata_adder(srcrepo, destrepo)

1054

else:

1056

else:

1055

return _get_worker_sidedata_adder(srcrepo, destrepo)

1057

return _get_worker_sidedata_adder(srcrepo, destrepo)

1056

1058

1057

1059

1058

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1060

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1059

"""The function used by worker precomputing sidedata

1061

"""The function used by worker precomputing sidedata

1060

1062

1061

It read an input queue containing revision numbers

1063

It read an input queue containing revision numbers

1062

It write in an output queue containing (rev, <sidedata-map>)

1064

It write in an output queue containing (rev, <sidedata-map>)

1063

1065

1064

The `None` input value is used as a stop signal.

1066

The `None` input value is used as a stop signal.

1065

1067

1066

The `tokens` semaphore is user to avoid having too many unprocessed

1068

The `tokens` semaphore is user to avoid having too many unprocessed

1067

entries. The workers needs to acquire one token before fetching a task.

1069

entries. The workers needs to acquire one token before fetching a task.

1068

They will be released by the consumer of the produced data.

1070

They will be released by the consumer of the produced data.

1069

"""

1071

"""

1070

tokens.acquire()

1072

tokens.acquire()

1071

rev = revs_queue.get()

1073

rev = revs_queue.get()

1072

while rev is not None:

1074

while rev is not None:

1073

data = _getsidedata(srcrepo, rev)

1075

data = _getsidedata(srcrepo, rev)

1074

sidedata_queue.put((rev, data))

1076

sidedata_queue.put((rev, data))

1075

tokens.acquire()

1077

tokens.acquire()

1076

rev = revs_queue.get()

1078

rev = revs_queue.get()

1077

# processing of `None` is completed, release the token.

1079

# processing of `None` is completed, release the token.

1078

tokens.release()

1080

tokens.release()

1079

1081

1080

1082

1081

BUFF_PER_WORKER = 50

1083

BUFF_PER_WORKER = 50

1082

1084

1083

1085

1084

def _get_worker_sidedata_adder(srcrepo, destrepo):

1086

def _get_worker_sidedata_adder(srcrepo, destrepo):

1085

"""The parallel version of the sidedata computation

1087

"""The parallel version of the sidedata computation

1086

1088

1087

This code spawn a pool of worker that precompute a buffer of sidedata

1089

This code spawn a pool of worker that precompute a buffer of sidedata

1088

before we actually need them"""

1090

before we actually need them"""

1089

# avoid circular import copies -> scmutil -> worker -> copies

1091

# avoid circular import copies -> scmutil -> worker -> copies

1090

from . import worker

1092

from . import worker

1091

1093

1092

nbworkers = worker._numworkers(srcrepo.ui)

1094

nbworkers = worker._numworkers(srcrepo.ui)

1093

1095

1094

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1096

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1095

revsq = multiprocessing.Queue()

1097

revsq = multiprocessing.Queue()

1096

sidedataq = multiprocessing.Queue()

1098

sidedataq = multiprocessing.Queue()

1097

1099

1098

assert srcrepo.filtername is None

1100

assert srcrepo.filtername is None

1099

# queue all tasks beforehand, revision numbers are small and it make

1101

# queue all tasks beforehand, revision numbers are small and it make

1100

# synchronisation simpler

1102

# synchronisation simpler

1101

#

1103

#

1102

# Since the computation for each node can be quite expensive, the overhead

1104

# Since the computation for each node can be quite expensive, the overhead

1103

# of using a single queue is not revelant. In practice, most computation

1105

# of using a single queue is not revelant. In practice, most computation

1104

# are fast but some are very expensive and dominate all the other smaller

1106

# are fast but some are very expensive and dominate all the other smaller

1105

# cost.

1107

# cost.

1106

for r in srcrepo.changelog.revs():

1108

for r in srcrepo.changelog.revs():

1107

revsq.put(r)

1109

revsq.put(r)

1108

# queue the "no more tasks" markers

1110

# queue the "no more tasks" markers

1109

for i in range(nbworkers):

1111

for i in range(nbworkers):

1110

revsq.put(None)

1112

revsq.put(None)

1111

1113

1112

allworkers = []

1114

allworkers = []

1113

for i in range(nbworkers):

1115

for i in range(nbworkers):

1114

args = (srcrepo, revsq, sidedataq, tokens)

1116

args = (srcrepo, revsq, sidedataq, tokens)

1115

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1117

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1116

allworkers.append(w)

1118

allworkers.append(w)

1117

w.start()

1119

w.start()

1118

1120

1119

# dictionnary to store results for revision higher than we one we are

1121

# dictionnary to store results for revision higher than we one we are

1120

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1122

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1121

# received, when shelve 43 for later use.

1123

# received, when shelve 43 for later use.

1122

staging = {}

1124

staging = {}

1123

1125

1124

def sidedata_companion(revlog, rev):

1126

def sidedata_companion(revlog, rev):

1125

sidedata = {}

1127

sidedata = {}

1126

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1128

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1127

# Is the data previously shelved ?

1129

# Is the data previously shelved ?

1128

sidedata = staging.pop(rev, None)

1130

sidedata = staging.pop(rev, None)

1129

if sidedata is None:

1131

if sidedata is None:

1130

# look at the queued result until we find the one we are lookig

1132

# look at the queued result until we find the one we are lookig

1131

# for (shelve the other ones)

1133

# for (shelve the other ones)

1132

r, sidedata = sidedataq.get()

1134

r, sidedata = sidedataq.get()

1133

while r != rev:

1135

while r != rev:

1134

staging[r] = sidedata

1136

staging[r] = sidedata

1135

r, sidedata = sidedataq.get()

1137

r, sidedata = sidedataq.get()

1136

tokens.release()

1138

tokens.release()

1137

return False, (), sidedata

1139

return False, (), sidedata

1138

1140

1139

return sidedata_companion

1141

return sidedata_companion

1140

1142

1141

1143

1142

def _get_simple_sidedata_adder(srcrepo, destrepo):

1144

def _get_simple_sidedata_adder(srcrepo, destrepo):

1143

"""The simple version of the sidedata computation

1145

"""The simple version of the sidedata computation

1144

1146

1145

It just compute it in the same thread on request"""

1147

It just compute it in the same thread on request"""

1146

1148

1147

def sidedatacompanion(revlog, rev):

1149

def sidedatacompanion(revlog, rev):

1148

sidedata = {}

1150

sidedata = {}

1149

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1151

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1150

sidedata = _getsidedata(srcrepo, rev)

1152

sidedata = _getsidedata(srcrepo, rev)

1151

return False, (), sidedata

1153

return False, (), sidedata

1152

1154

1153

return sidedatacompanion

1155

return sidedatacompanion

1154

1156

1155

1157

1156

def getsidedataremover(srcrepo, destrepo):

1158

def getsidedataremover(srcrepo, destrepo):

1157

def sidedatacompanion(revlog, rev):

1159

def sidedatacompanion(revlog, rev):

1158

f = ()

1160

f = ()

1159

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1161

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1160

if revlog.flags(rev) & REVIDX_SIDEDATA:

1162

if revlog.flags(rev) & REVIDX_SIDEDATA:

1161

f = (

1163

f = (

1162

sidedatamod.SD_P1COPIES,

1164

sidedatamod.SD_P1COPIES,

1163

sidedatamod.SD_P2COPIES,

1165

sidedatamod.SD_P2COPIES,

1164

sidedatamod.SD_FILESADDED,

1166

sidedatamod.SD_FILESADDED,

1165

sidedatamod.SD_FILESREMOVED,

1167

sidedatamod.SD_FILESREMOVED,

1166

)

1168

)

1167

return False, f, {}

1169

return False, f, {}

1168

1170

1169

return sidedatacompanion

1171

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import multiprocessing
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     # A small cache to avoid doing the work twice for merges
                     #
                     # In the vast majority of cases, if we ask information for a revision
                     # about 1 parent, we'll later ask it for the other. So it make sense to
                     # keep the information around when reaching the first parent of a merge
                     # and dropping it after it was provided for the second parents.
                     #
                     # It exists cases were only one parent of the merge will be walked. It
                     # happens when the "destination" the copy tracing is descendant from a
                     # new root, not common with the "source". In that case, we will only walk
                     # through merge parents that are descendant of changesets common
                     # between "source" and "destination".
                     #
                     # With the current case implementation if such changesets have a copy
                     # information, we'll keep them in memory until the end of
                     # _changesetforwardcopies. We don't expect the case to be frequent
                     # enough to matters.
                     #
                     # In addition, it would be possible to reach pathological case, were
                     # many first parent are met before any second parent is reached. In
                     # that case the cache could grow. If this even become an issue one can
                     # safely introduce a maximum cache size. This would trade extra CPU/IO
                     # time to save memory.
                     merge_caches = {}
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
                             e = merge_caches.pop(rev, None)
                             if e is not None:
                                 return e
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                             if p1 != node.nullrev and p2 != node.nullrev:
                                 # XXX some case we over cache, IGNORE
                                 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = []
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
             def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                             # _chain makes a copies, we can avoid doing so in some
                             # simple/linear cases.
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 return all_copies[targetrev]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
+                if y.rev() is None and x == y.p1():
+                    if debug:
+                        repo.ui.debug(b'debug.copies: search mode: dirstate\n')
+                    # short-circuit to avoid issues with merge states
+                    return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
-                    if y.rev() is None and x == y.p1():
-                        # short-circuit to avoid issues with merge states
-                        return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion

             ===================================
             Test repository filtering avoidance
             ===================================
             This test file is a bit special as he does not check feature, but performance related internal code path.
             Right now, filtering a repository comes with a cost that might be significant.
             Until this get better, ther are various operation that try hard not to trigger
             a filtering computation. This test file make sure we don't reintroduce code that trigger the filtering for these operation:
             Setup
             -----
               $ hg init test-repo
               $ cd test-repo
               $ echo "some line" > z
               $ echo a > a
               $ hg commit -Am a
               adding a
               adding z
               $ echo "in a" >> z
               $ echo b > b
               $ hg commit -Am b
               adding b
               $ echo "file" >> z
               $ echo c > c
               $ hg commit -Am c
               adding c
               $ hg rm a
               $ echo c1 > c
               $ hg add c
               c already tracked!
               $ echo d > d
               $ hg add d
               $ rm b
               $ cat << EOF >> $HGRCPATH
               > [devel]
               > debug.repo-filters = yes
               > [ui]
               > debug = yes
               > EOF
             tests
             -----
             Getting the node of `null`
               $ hg log -r null -T "{node}\n"
               0000000000000000000000000000000000000000
             Getting basic changeset inforation about `null`
               $ hg log -r null -T "{node}\n{date}\n"
               0000000000000000000000000000000000000000
 .00
             Getting status of null
               $ hg status --change null
             Getting status of working copy
               $ hg status
               M c
               A d
               R a
               ! b
               $ hg status --copies
-              debug.filters: computing revision filter for "visible"
               M c
               A d
               R a
               ! b
             Getting data about the working copy parent
               $ hg log -r '.' -T "{node}\n{date}\n"
               c2932ca7786be30b67154d541a8764fae5532261
 .00
             Getting working copy diff
               $ hg diff
               diff -r c2932ca7786be30b67154d541a8764fae5532261 a
               --- a/a	Thu Jan 01 00:00:00 1970 +0000
               +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
               @@ -1,1 +0,0 @@
               -a
               diff -r c2932ca7786be30b67154d541a8764fae5532261 c
               --- a/c	Thu Jan 01 00:00:00 1970 +0000
               +++ b/c	Thu Jan 01 00:00:00 1970 +0000
               @@ -1,1 +1,1 @@
               -c
               +c1
               diff -r c2932ca7786be30b67154d541a8764fae5532261 d
               --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
               +++ b/d	Thu Jan 01 00:00:00 1970 +0000
               @@ -0,0 +1,1 @@
               +d
               $ hg diff --change .
               diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 c
               --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
               +++ b/c	Thu Jan 01 00:00:00 1970 +0000
               @@ -0,0 +1,1 @@
               +c
               diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 z
               --- a/z	Thu Jan 01 00:00:00 1970 +0000
               +++ b/z	Thu Jan 01 00:00:00 1970 +0000
               @@ -1,2 +1,3 @@
                some line
                in a
               +file
             exporting the current changeset
               $ hg export
               exporting patch:
               # HG changeset patch
               # User test
               # Date 0 0
               #      Thu Jan 01 00:00:00 1970 +0000
               # Node ID c2932ca7786be30b67154d541a8764fae5532261
               # Parent  05293e5dd8d1ae4f84a8520a11c6f97cad26deca
               c
               diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 c
               --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
               +++ b/c	Thu Jan 01 00:00:00 1970 +0000
               @@ -0,0 +1,1 @@
               +c
               diff -r 05293e5dd8d1ae4f84a8520a11c6f97cad26deca -r c2932ca7786be30b67154d541a8764fae5532261 z
               --- a/z	Thu Jan 01 00:00:00 1970 +0000
               +++ b/z	Thu Jan 01 00:00:00 1970 +0000
               @@ -1,2 +1,3 @@
                some line
                in a
               +file
             using annotate
             - file with a single change
               $ hg annotate a
 : a
             - file with multiple change
               $ hg annotate z
 : some line
 : in a
 : file