upstream/mercurial-mirror Commit - r44623:782e0d9c

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import multiprocessing

11

import multiprocessing

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(prefix, suffix):

67

def _chain(prefix, suffix):

68

"""chain two sets of copies 'prefix' and 'suffix'"""

68

"""chain two sets of copies 'prefix' and 'suffix'"""

69

result = prefix.copy()

69

result = prefix.copy()

70

for key, value in pycompat.iteritems(suffix):

70

for key, value in pycompat.iteritems(suffix):

71

result[key] = prefix.get(value, value)

71

result[key] = prefix.get(value, value)

72

return result

72

return result

73

74

75

def _tracefile(fctx, am, basemf):

75

def _tracefile(fctx, am, basemf):

76

"""return file context that is the ancestor of fctx present in ancestor

76

"""return file context that is the ancestor of fctx present in ancestor

77

manifest am

77

manifest am

78

79

Note: we used to try and stop after a given limit, however checking if that

79

Note: we used to try and stop after a given limit, however checking if that

80

limit is reached turned out to be very expensive. we are better off

80

limit is reached turned out to be very expensive. we are better off

81

disabling that feature."""

81

disabling that feature."""

82

83

for f in fctx.ancestors():

83

for f in fctx.ancestors():

84

path = f.path()

84

path = f.path()

85

if am.get(path, None) == f.filenode():

85

if am.get(path, None) == f.filenode():

86

return path

86

return path

87

if basemf and basemf.get(path, None) == f.filenode():

87

if basemf and basemf.get(path, None) == f.filenode():

88

return path

88

return path

89

90

91

def _dirstatecopies(repo, match=None):

91

def _dirstatecopies(repo, match=None):

92

ds = repo.dirstate

92

ds = repo.dirstate

93

c = ds.copies().copy()

93

c = ds.copies().copy()

94

for k in list(c):

94

for k in list(c):

95

if ds[k] not in b'anm' or (match and not match(k)):

95

if ds[k] not in b'anm' or (match and not match(k)):

96

del c[k]

96

del c[k]

97

return c

97

return c

98

99

100

def _computeforwardmissing(a, b, match=None):

100

def _computeforwardmissing(a, b, match=None):

101

"""Computes which files are in b but not a.

101

"""Computes which files are in b but not a.

102

This is its own function so extensions can easily wrap this call to see what

102

This is its own function so extensions can easily wrap this call to see what

103

files _forwardcopies is about to process.

103

files _forwardcopies is about to process.

104

"""

104

"""

105

ma = a.manifest()

105

ma = a.manifest()

106

mb = b.manifest()

106

mb = b.manifest()

107

return mb.filesnotin(ma, match=match)

107

return mb.filesnotin(ma, match=match)

108

109

110

def usechangesetcentricalgo(repo):

110

def usechangesetcentricalgo(repo):

111

"""Checks if we should use changeset-centric copy algorithms"""

111

"""Checks if we should use changeset-centric copy algorithms"""

112

if repo.filecopiesmode == b'changeset-sidedata':

112

if repo.filecopiesmode == b'changeset-sidedata':

113

return True

113

return True

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

115

changesetsource = (b'changeset-only', b'compatibility')

115

changesetsource = (b'changeset-only', b'compatibility')

116

return readfrom in changesetsource

116

return readfrom in changesetsource

117

118

119

def _committedforwardcopies(a, b, base, match):

119

def _committedforwardcopies(a, b, base, match):

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

121

# files might have to be traced back to the fctx parent of the last

121

# files might have to be traced back to the fctx parent of the last

122

# one-side-only changeset, but not further back than that

122

# one-side-only changeset, but not further back than that

123

repo = a._repo

123

repo = a._repo

124

125

if usechangesetcentricalgo(repo):

125

if usechangesetcentricalgo(repo):

126

return _changesetforwardcopies(a, b, match)

126

return _changesetforwardcopies(a, b, match)

127

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

129

dbg = repo.ui.debug

129

dbg = repo.ui.debug

130

if debug:

130

if debug:

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

132

am = a.manifest()

132

am = a.manifest()

133

basemf = None if base is None else base.manifest()

133

basemf = None if base is None else base.manifest()

134

135

# find where new files came from

135

# find where new files came from

136

# we currently don't try to find where old files went, too expensive

136

# we currently don't try to find where old files went, too expensive

137

# this means we can miss a case like 'hg rm b; hg cp a b'

137

# this means we can miss a case like 'hg rm b; hg cp a b'

138

cm = {}

138

cm = {}

139

140

# Computing the forward missing is quite expensive on large manifests, since

140

# Computing the forward missing is quite expensive on large manifests, since

141

# it compares the entire manifests. We can optimize it in the common use

141

# it compares the entire manifests. We can optimize it in the common use

142

# case of computing what copies are in a commit versus its parent (like

142

# case of computing what copies are in a commit versus its parent (like

143

# during a rebase or histedit). Note, we exclude merge commits from this

143

# during a rebase or histedit). Note, we exclude merge commits from this

144

# optimization, since the ctx.files() for a merge commit is not correct for

144

# optimization, since the ctx.files() for a merge commit is not correct for

145

# this comparison.

145

# this comparison.

146

forwardmissingmatch = match

146

forwardmissingmatch = match

147

if b.p1() == a and b.p2().node() == node.nullid:

147

if b.p1() == a and b.p2().node() == node.nullid:

148

filesmatcher = matchmod.exact(b.files())

148

filesmatcher = matchmod.exact(b.files())

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

151

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

153

154

if debug:

154

if debug:

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

156

157

for f in sorted(missing):

157

for f in sorted(missing):

158

if debug:

158

if debug:

159

dbg(b'debug.copies: tracing file: %s\n' % f)

159

dbg(b'debug.copies: tracing file: %s\n' % f)

160

fctx = b[f]

160

fctx = b[f]

161

fctx._ancestrycontext = ancestrycontext

161

fctx._ancestrycontext = ancestrycontext

162

163

if debug:

163

if debug:

164

start = util.timer()

164

start = util.timer()

165

opath = _tracefile(fctx, am, basemf)

165

opath = _tracefile(fctx, am, basemf)

166

if opath:

166

if opath:

167

if debug:

167

if debug:

168

dbg(b'debug.copies: rename of: %s\n' % opath)

168

dbg(b'debug.copies: rename of: %s\n' % opath)

169

cm[f] = opath

169

cm[f] = opath

170

if debug:

170

if debug:

171

dbg(

171

dbg(

172

b'debug.copies: time: %f seconds\n'

172

b'debug.copies: time: %f seconds\n'

173

% (util.timer() - start)

173

% (util.timer() - start)

174

)

174

)

175

return cm

175

return cm

176

177

178

def _revinfogetter(repo):

178

def _revinfogetter(repo):

179

"""return a function that return multiple data given a <rev>"i

179

"""return a function that return multiple data given a <rev>"i

180

181

* p1: revision number of first parent

181

* p1: revision number of first parent

182

* p2: revision number of first parent

182

* p2: revision number of first parent

183

* p1copies: mapping of copies from p1

183

* p1copies: mapping of copies from p1

184

* p2copies: mapping of copies from p2

184

* p2copies: mapping of copies from p2

185

* removed: a list of removed files

185

* removed: a list of removed files

186

"""

186

"""

187

cl = repo.changelog

187

cl = repo.changelog

188

parents = cl.parentrevs

188

parents = cl.parentrevs

189

190

if repo.filecopiesmode == b'changeset-sidedata':

190

if repo.filecopiesmode == b'changeset-sidedata':

191

changelogrevision = cl.changelogrevision

191

changelogrevision = cl.changelogrevision

192

flags = cl.flags

192

flags = cl.flags

193

194

# A small cache to avoid doing the work twice for merges

194

# A small cache to avoid doing the work twice for merges

195

#

195

#

196

# In the vast majority of cases, if we ask information for a revision

196

# In the vast majority of cases, if we ask information for a revision

197

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# about 1 parent, we'll later ask it for the other. So it make sense to

198

# keep the information around when reaching the first parent of a merge

198

# keep the information around when reaching the first parent of a merge

199

# and dropping it after it was provided for the second parents.

199

# and dropping it after it was provided for the second parents.

200

#

200

#

201

# It exists cases were only one parent of the merge will be walked. It

201

# It exists cases were only one parent of the merge will be walked. It

202

# happens when the "destination" the copy tracing is descendant from a

202

# happens when the "destination" the copy tracing is descendant from a

203

# new root, not common with the "source". In that case, we will only walk

203

# new root, not common with the "source". In that case, we will only walk

204

# through merge parents that are descendant of changesets common

204

# through merge parents that are descendant of changesets common

205

# between "source" and "destination".

205

# between "source" and "destination".

206

#

206

#

207

# With the current case implementation if such changesets have a copy

207

# With the current case implementation if such changesets have a copy

208

# information, we'll keep them in memory until the end of

208

# information, we'll keep them in memory until the end of

209

# _changesetforwardcopies. We don't expect the case to be frequent

209

# _changesetforwardcopies. We don't expect the case to be frequent

210

# enough to matters.

210

# enough to matters.

211

#

211

#

212

# In addition, it would be possible to reach pathological case, were

212

# In addition, it would be possible to reach pathological case, were

213

# many first parent are met before any second parent is reached. In

213

# many first parent are met before any second parent is reached. In

214

# that case the cache could grow. If this even become an issue one can

214

# that case the cache could grow. If this even become an issue one can

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

216

# time to save memory.

216

# time to save memory.

217

merge_caches = {}

217

merge_caches = {}

218

219

def revinfo(rev):

219

def revinfo(rev):

220

p1, p2 = parents(rev)

220

p1, p2 = parents(rev)

221

if flags(rev) & REVIDX_SIDEDATA:

221

if flags(rev) & REVIDX_SIDEDATA:

222

e = merge_caches.pop(rev, None)

222

e = merge_caches.pop(rev, None)

223

if e is not None:

223

if e is not None:

224

return e

224

return e

225

c = changelogrevision(rev)

225

c = changelogrevision(rev)

226

p1copies = c.p1copies

226

p1copies = c.p1copies

227

p2copies = c.p2copies

227

p2copies = c.p2copies

228

removed = c.filesremoved

228

removed = c.filesremoved

229

if p1 != node.nullrev and p2 != node.nullrev:

229

if p1 != node.nullrev and p2 != node.nullrev:

230

# XXX some case we over cache, IGNORE

230

# XXX some case we over cache, IGNORE

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

232

else:

232

else:

233

p1copies = {}

233

p1copies = {}

234

p2copies = {}

234

p2copies = {}

235

removed = []

235

removed = []

236

return p1, p2, p1copies, p2copies, removed

236

return p1, p2, p1copies, p2copies, removed

237

238

else:

238

else:

239

240

def revinfo(rev):

240

def revinfo(rev):

241

p1, p2 = parents(rev)

241

p1, p2 = parents(rev)

242

ctx = repo[rev]

242

ctx = repo[rev]

243

p1copies, p2copies = ctx._copies

243

p1copies, p2copies = ctx._copies

244

removed = ctx.filesremoved()

244

removed = ctx.filesremoved()

245

return p1, p2, p1copies, p2copies, removed

245

return p1, p2, p1copies, p2copies, removed

246

247

return revinfo

247

return revinfo

248

249

250

def _changesetforwardcopies(a, b, match):

250

def _changesetforwardcopies(a, b, match):

251

if a.rev() in (node.nullrev, b.rev()):

251

if a.rev() in (node.nullrev, b.rev()):

252

return {}

252

return {}

253

254

repo = a.repo().unfiltered()

254

repo = a.repo().unfiltered()

255

children = {}

255

children = {}

256

revinfo = _revinfogetter(repo)

256

revinfo = _revinfogetter(repo)

257

258

cl = repo.changelog

258

cl = repo.changelog

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

260

mrset = set(missingrevs)

260

mrset = set(missingrevs)

261

roots = set()

261

roots = set()

262

for r in missingrevs:

262

for r in missingrevs:

263

for p in cl.parentrevs(r):

263

for p in cl.parentrevs(r):

264

if p == node.nullrev:

264

if p == node.nullrev:

265

continue

265

continue

266

if p not in children:

266

if p not in children:

267

children[p] = [r]

267

children[p] = [r]

268

else:

268

else:

269

children[p].append(r)

269

children[p].append(r)

270

if p not in mrset:

270

if p not in mrset:

271

roots.add(p)

271

roots.add(p)

272

if not roots:

272

if not roots:

273

# no common revision to track copies from

273

# no common revision to track copies from

274

return {}

274

return {}

275

min_root = min(roots)

275

min_root = min(roots)

276

277

from_head = set(

277

from_head = set(

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

279

)

279

)

280

281

iterrevs = set(from_head)

281

iterrevs = set(from_head)

282

iterrevs &= mrset

282

iterrevs &= mrset

283

iterrevs.update(roots)

283

iterrevs.update(roots)

284

iterrevs.remove(b.rev())

284

iterrevs.remove(b.rev())

285

revs = sorted(iterrevs)

285

revs = sorted(iterrevs)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

287

288

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

290

"""combine the copies information for each item of iterrevs

290

"""combine the copies information for each item of iterrevs

291

292

revs: sorted iterable of revision to visit

292

revs: sorted iterable of revision to visit

293

children: a {parent: [children]} mapping.

293

children: a {parent: [children]} mapping.

294

targetrev: the final copies destination revision (not in iterrevs)

294

targetrev: the final copies destination revision (not in iterrevs)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

296

match: a matcher

296

match: a matcher

297

298

It returns the aggregated copies information for `targetrev`.

298

It returns the aggregated copies information for `targetrev`.

299

"""

299

"""

300

all_copies = {}

300

all_copies = {}

301

alwaysmatch = match.always()

301

alwaysmatch = match.always()

302

for r in revs:

302

for r in revs:

303

copies = all_copies.pop(r, None)

303

copies = all_copies.pop(r, None)

304

if copies is None:

304

if copies is None:

305

# this is a root

305

# this is a root

306

copies = {}

306

copies = {}

307

for i, c in enumerate(children[r]):

307

for i, c in enumerate(children[r]):

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

309

if r == p1:

309

if r == p1:

310

parent = 1

310

parent = 1

311

childcopies = p1copies

311

childcopies = p1copies

312

else:

312

else:

313

assert r == p2

313

assert r == p2

314

parent = 2

314

parent = 2

315

childcopies = p2copies

315

childcopies = p2copies

316

if not alwaysmatch:

316

if not alwaysmatch:

317

childcopies = {

317

childcopies = {

318

dst: src for dst, src in childcopies.items() if match(dst)

318

dst: src for dst, src in childcopies.items() if match(dst)

319

}

319

}

320

newcopies = copies

320

newcopies = copies

321

if childcopies:

321

if childcopies:

322

newcopies = _chain(newcopies, childcopies)

322

newcopies = _chain(newcopies, childcopies)

323

# _chain makes a copies, we can avoid doing so in some

323

# _chain makes a copies, we can avoid doing so in some

324

# simple/linear cases.

324

# simple/linear cases.

325

assert newcopies is not copies

325

assert newcopies is not copies

326

for f in removed:

326

for f in removed:

327

if f in newcopies:

327

if f in newcopies:

328

if newcopies is copies:

328

if newcopies is copies:

329

# copy on write to avoid affecting potential other

329

# copy on write to avoid affecting potential other

330

# branches. when there are no other branches, this

330

# branches. when there are no other branches, this

331

# could be avoided.

331

# could be avoided.

332

newcopies = copies.copy()

332

newcopies = copies.copy()

333

del newcopies[f]

333

del newcopies[f]

334

othercopies = all_copies.get(c)

334

othercopies = all_copies.get(c)

335

if othercopies is None:

335

if othercopies is None:

336

all_copies[c] = newcopies

336

all_copies[c] = newcopies

337

else:

337

else:

338

# we are the second parent to work on c, we need to merge our

338

# we are the second parent to work on c, we need to merge our

339

# work with the other.

339

# work with the other.

340

#

340

#

341

# Unlike when copies are stored in the filelog, we consider

341

# Unlike when copies are stored in the filelog, we consider

342

# it a copy even if the destination already existed on the

342

# it a copy even if the destination already existed on the

343

# other branch. It's simply too expensive to check if the

343

# other branch. It's simply too expensive to check if the

344

# file existed in the manifest.

344

# file existed in the manifest.

345

#

345

#

346

# In case of conflict, parent 1 take precedence over parent 2.

346

# In case of conflict, parent 1 take precedence over parent 2.

347

# This is an arbitrary choice made anew when implementing

347

# This is an arbitrary choice made anew when implementing

348

# changeset based copies. It was made without regards with

348

# changeset based copies. It was made without regards with

349

# potential filelog related behavior.

349

# potential filelog related behavior.

350

if parent == 1:

350

if parent == 1:

351

othercopies.update(newcopies)

351

othercopies.update(newcopies)

352

else:

352

else:

353

newcopies.update(othercopies)

353

newcopies.update(othercopies)

354

all_copies[c] = newcopies

354

all_copies[c] = newcopies

355

return all_copies[targetrev]

355

return all_copies[targetrev]

356

357

358

def _forwardcopies(a, b, base=None, match=None):

358

def _forwardcopies(a, b, base=None, match=None):

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

360

361

if base is None:

361

if base is None:

362

base = a

362

base = a

363

match = a.repo().narrowmatch(match)

363

match = a.repo().narrowmatch(match)

364

# check for working copy

364

# check for working copy

365

if b.rev() is None:

365

if b.rev() is None:

366

cm = _committedforwardcopies(a, b.p1(), base, match)

366

cm = _committedforwardcopies(a, b.p1(), base, match)

367

# combine copies from dirstate if necessary

367

# combine copies from dirstate if necessary

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

369

else:

369

else:

370

copies = _committedforwardcopies(a, b, base, match)

370

copies = _committedforwardcopies(a, b, base, match)

371

return copies

371

return copies

372

373

374

def _backwardrenames(a, b, match):

374

def _backwardrenames(a, b, match):

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

376

return {}

376

return {}

377

378

# Even though we're not taking copies into account, 1:n rename situations

378

# Even though we're not taking copies into account, 1:n rename situations

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

380

# arbitrarily pick one of the renames.

380

# arbitrarily pick one of the renames.

381

# We don't want to pass in "match" here, since that would filter

381

# We don't want to pass in "match" here, since that would filter

382

# the destination by it. Since we're reversing the copies, we want

382

# the destination by it. Since we're reversing the copies, we want

383

# to filter the source instead.

383

# to filter the source instead.

384

f = _forwardcopies(b, a)

384

f = _forwardcopies(b, a)

385

r = {}

385

r = {}

386

for k, v in sorted(pycompat.iteritems(f)):

386

for k, v in sorted(pycompat.iteritems(f)):

387

if match and not match(v):

387

if match and not match(v):

388

continue

388

continue

389

# remove copies

389

# remove copies

390

if v in a:

390

if v in a:

391

continue

391

continue

392

r[v] = k

392

r[v] = k

393

return r

393

return r

394

395

396

def pathcopies(x, y, match=None):

396

def pathcopies(x, y, match=None):

397

"""find {dst@y: src@x} copy mapping for directed compare"""

397

"""find {dst@y: src@x} copy mapping for directed compare"""

398

repo = x._repo

398

repo = x._repo

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

400

if debug:

400

if debug:

401

repo.ui.debug(

401

repo.ui.debug(

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

403

)

403

)

404

if x == y or not x or not y:

404

if x == y or not x or not y:

405

return {}

405

return {}

406

a = y.ancestor(x)

406

a = y.ancestor(x)

407

if a == x:

407

if a == x:

408

if debug:

408

if debug:

409

repo.ui.debug(b'debug.copies: search mode: forward\n')

409

repo.ui.debug(b'debug.copies: search mode: forward\n')

410

if y.rev() is None and x == y.p1():

410

if y.rev() is None and x == y.p1():

411

# short-circuit to avoid issues with merge states

411

# short-circuit to avoid issues with merge states

412

return _dirstatecopies(repo, match)

412

return _dirstatecopies(repo, match)

413

copies = _forwardcopies(x, y, match=match)

413

copies = _forwardcopies(x, y, match=match)

414

elif a == y:

414

elif a == y:

415

if debug:

415

if debug:

416

repo.ui.debug(b'debug.copies: search mode: backward\n')

416

repo.ui.debug(b'debug.copies: search mode: backward\n')

417

copies = _backwardrenames(x, y, match=match)

417

copies = _backwardrenames(x, y, match=match)

418

else:

418

else:

419

if debug:

419

if debug:

420

repo.ui.debug(b'debug.copies: search mode: combined\n')

420

repo.ui.debug(b'debug.copies: search mode: combined\n')

421

base = None

421

base = None

422

if a.rev() != node.nullrev:

422

if a.rev() != node.nullrev:

423

base = x

423

base = x

424

copies = _chain(

424

copies = _chain(

425

_backwardrenames(x, a, match=match),

425

_backwardrenames(x, a, match=match),

426

_forwardcopies(a, y, base, match=match),

426

_forwardcopies(a, y, base, match=match),

427

)

427

)

428

_filter(x, y, copies)

428

_filter(x, y, copies)

429

return copies

429

return copies

430

431

432

def mergecopies(repo, c1, c2, base):

432

def mergecopies(repo, c1, c2, base):

433

"""

433

"""

434

Finds moves and copies between context c1 and c2 that are relevant for

434

Finds moves and copies between context c1 and c2 that are relevant for

435

merging. 'base' will be used as the merge base.

435

merging. 'base' will be used as the merge base.

436

437

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

437

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

438

files that were moved/ copied in one merge parent and modified in another.

438

files that were moved/ copied in one merge parent and modified in another.

439

For example:

439

For example:

440

441

o ---> 4 another commit

441

o ---> 4 another commit

442

|

442

|

443

| o ---> 3 commit that modifies a.txt

443

| o ---> 3 commit that modifies a.txt

444

| /

444

| /

445

o / ---> 2 commit that moves a.txt to b.txt

445

o / ---> 2 commit that moves a.txt to b.txt

446

|/

446

|/

447

o ---> 1 merge base

447

o ---> 1 merge base

448

449

If we try to rebase revision 3 on revision 4, since there is no a.txt in

449

If we try to rebase revision 3 on revision 4, since there is no a.txt in

450

revision 4, and if user have copytrace disabled, we prints the following

450

revision 4, and if user have copytrace disabled, we prints the following

451

message:

451

message:

452

453

```other changed <file> which local deleted```

453

```other changed <file> which local deleted```

454

455

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

455

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

456

"dirmove".

456

"dirmove".

457

458

"copy" is a mapping from destination name -> source name,

458

"copy" is a mapping from destination name -> source name,

459

where source is in c1 and destination is in c2 or vice-versa.

459

where source is in c1 and destination is in c2 or vice-versa.

460

461

"movewithdir" is a mapping from source name -> destination name,

461

"movewithdir" is a mapping from source name -> destination name,

462

where the file at source present in one context but not the other

462

where the file at source present in one context but not the other

463

needs to be moved to destination by the merge process, because the

463

needs to be moved to destination by the merge process, because the

464

other context moved the directory it is in.

464

other context moved the directory it is in.

465

466

"diverge" is a mapping of source name -> list of destination names

466

"diverge" is a mapping of source name -> list of destination names

467

for divergent renames.

467

for divergent renames.

468

469

"renamedelete" is a mapping of source name -> list of destination

469

"renamedelete" is a mapping of source name -> list of destination

470

names for files deleted in c1 that were renamed in c2 or vice-versa.

470

names for files deleted in c1 that were renamed in c2 or vice-versa.

471

472

"dirmove" is a mapping of detected source dir -> destination dir renames.

472

"dirmove" is a mapping of detected source dir -> destination dir renames.

473

This is needed for handling changes to new files previously grafted into

473

This is needed for handling changes to new files previously grafted into

474

renamed directories.

474

renamed directories.

475

476

This function calls different copytracing algorithms based on config.

476

This function calls different copytracing algorithms based on config.

477

"""

477

"""

478

# avoid silly behavior for update from empty dir

478

# avoid silly behavior for update from empty dir

479

if not c1 or not c2 or c1 == c2:

479

if not c1 or not c2 or c1 == c2:

480

return {}, {}, {}, {}, {}

480

return {}, {}, {}, {}, {}

481

482

narrowmatch = c1.repo().narrowmatch()

482

narrowmatch = c1.repo().narrowmatch()

483

484

# avoid silly behavior for parent -> working dir

484

# avoid silly behavior for parent -> working dir

485

if c2.node() is None and c1.node() == repo.dirstate.p1():

485

if c2.node() is None and c1.node() == repo.dirstate.p1():

486

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

486

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

487

488

copytracing = repo.ui.config(b'experimental', b'copytrace')

488

copytracing = repo.ui.config(b'experimental', b'copytrace')

489

if stringutil.parsebool(copytracing) is False:

489

if stringutil.parsebool(copytracing) is False:

490

# stringutil.parsebool() returns None when it is unable to parse the

490

# stringutil.parsebool() returns None when it is unable to parse the

491

# value, so we should rely on making sure copytracing is on such cases

491

# value, so we should rely on making sure copytracing is on such cases

492

return {}, {}, {}, {}, {}

492

return {}, {}, {}, {}, {}

493

494

if usechangesetcentricalgo(repo):

494

if usechangesetcentricalgo(repo):

495

# The heuristics don't make sense when we need changeset-centric algos

495

# The heuristics don't make sense when we need changeset-centric algos

496

return _fullcopytracing(repo, c1, c2, base)

496

return _fullcopytracing(repo, c1, c2, base)

497

498

# Copy trace disabling is explicitly below the node == p1 logic above

498

# Copy trace disabling is explicitly below the node == p1 logic above

499

# because the logic above is required for a simple copy to be kept across a

499

# because the logic above is required for a simple copy to be kept across a

500

# rebase.

500

# rebase.

501

if copytracing == b'heuristics':

501

if copytracing == b'heuristics':

502

# Do full copytracing if only non-public revisions are involved as

502

# Do full copytracing if only non-public revisions are involved as

503

# that will be fast enough and will also cover the copies which could

503

# that will be fast enough and will also cover the copies which could

504

# be missed by heuristics

504

# be missed by heuristics

505

if _isfullcopytraceable(repo, c1, base):

505

if _isfullcopytraceable(repo, c1, base):

506

return _fullcopytracing(repo, c1, c2, base)

506

return _fullcopytracing(repo, c1, c2, base)

507

return _heuristicscopytracing(repo, c1, c2, base)

507

return _heuristicscopytracing(repo, c1, c2, base)

508

else:

508

else:

509

return _fullcopytracing(repo, c1, c2, base)

509

return _fullcopytracing(repo, c1, c2, base)

510

511

512

def _isfullcopytraceable(repo, c1, base):

512

def _isfullcopytraceable(repo, c1, base):

513

""" Checks that if base, source and destination are all no-public branches,

513

""" Checks that if base, source and destination are all no-public branches,

514

if yes let's use the full copytrace algorithm for increased capabilities

514

if yes let's use the full copytrace algorithm for increased capabilities

515

since it will be fast enough.

515

since it will be fast enough.

516

517

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

517

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

518

number of changesets from c1 to base such that if number of changesets are

518

number of changesets from c1 to base such that if number of changesets are

519

more than the limit, full copytracing algorithm won't be used.

519

more than the limit, full copytracing algorithm won't be used.

520

"""

520

"""

521

if c1.rev() is None:

521

if c1.rev() is None:

522

c1 = c1.p1()

522

c1 = c1.p1()

523

if c1.mutable() and base.mutable():

523

if c1.mutable() and base.mutable():

524

sourcecommitlimit = repo.ui.configint(

524

sourcecommitlimit = repo.ui.configint(

525

b'experimental', b'copytrace.sourcecommitlimit'

525

b'experimental', b'copytrace.sourcecommitlimit'

526

)

526

)

527

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

527

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

528

return commits < sourcecommitlimit

528

return commits < sourcecommitlimit

529

return False

529

return False

530

531

532

def _checksinglesidecopies(

532

def _checksinglesidecopies(

533

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

533

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

534

):

534

):

535

if src not in m2:

535

if src not in m2:

536

# deleted on side 2

536

# deleted on side 2

537

if src not in m1:

537

if src not in m1:

538

# renamed on side 1, deleted on side 2

538

# renamed on side 1, deleted on side 2

539

renamedelete[src] = dsts1

539

renamedelete[src] = dsts1

540

elif m2[src] != mb[src]:

540

elif m2[src] != mb[src]:

541

if not _related(c2[src], base[src]):

541

if not _related(c2[src], base[src]):

542

return

542

return

543

# modified on side 2

543

# modified on side 2

544

for dst in dsts1:

544

for dst in dsts1:

545

if dst not in m2:

545

if dst not in m2:

546

# dst not added on side 2 (handle as regular

546

# dst not added on side 2 (handle as regular

547

# "both created" case in manifestmerge otherwise)

547

# "both created" case in manifestmerge otherwise)

548

copy[dst] = src

548

copy[dst] = src

549

550

551

def _fullcopytracing(repo, c1, c2, base):

551

def _fullcopytracing(repo, c1, c2, base):

552

""" The full copytracing algorithm which finds all the new files that were

552

""" The full copytracing algorithm which finds all the new files that were

553

added from merge base up to the top commit and for each file it checks if

553

added from merge base up to the top commit and for each file it checks if

554

this file was copied from another file.

554

this file was copied from another file.

555

556

This is pretty slow when a lot of changesets are involved but will track all

556

This is pretty slow when a lot of changesets are involved but will track all

557

the copies.

557

the copies.

558

"""

558

"""

559

m1 = c1.manifest()

559

m1 = c1.manifest()

560

m2 = c2.manifest()

560

m2 = c2.manifest()

561

mb = base.manifest()

561

mb = base.manifest()

562

563

copies1 = pathcopies(base, c1)

563

copies1 = pathcopies(base, c1)

564

copies2 = pathcopies(base, c2)

564

copies2 = pathcopies(base, c2)

565

566

if not (copies1 or copies2):

566

if not (copies1 or copies2):

567

return {}, {}, {}, {}, {}

567

return {}, {}, {}, {}, {}

568

569

inversecopies1 = {}

569

inversecopies1 = {}

570

inversecopies2 = {}

570

inversecopies2 = {}

571

for dst, src in copies1.items():

571

for dst, src in copies1.items():

572

inversecopies1.setdefault(src, []).append(dst)

572

inversecopies1.setdefault(src, []).append(dst)

573

for dst, src in copies2.items():

573

for dst, src in copies2.items():

574

inversecopies2.setdefault(src, []).append(dst)

574

inversecopies2.setdefault(src, []).append(dst)

575

576

copy = {}

576

copy = {}

577

diverge = {}

577

diverge = {}

578

renamedelete = {}

578

renamedelete = {}

579

allsources = set(inversecopies1) | set(inversecopies2)

579

allsources = set(inversecopies1) | set(inversecopies2)

580

for src in allsources:

580

for src in allsources:

581

dsts1 = inversecopies1.get(src)

581

dsts1 = inversecopies1.get(src)

582

dsts2 = inversecopies2.get(src)

582

dsts2 = inversecopies2.get(src)

583

if dsts1 and dsts2:

583

if dsts1 and dsts2:

584

# copied/renamed on both sides

584

# copied/renamed on both sides

585

if src not in m1 and src not in m2:

585

if src not in m1 and src not in m2:

586

# renamed on both sides

586

# renamed on both sides

587

dsts1 = set(dsts1)

587

dsts1 = set(dsts1)

588

dsts2 = set(dsts2)

588

dsts2 = set(dsts2)

589

# If there's some overlap in the rename destinations, we

589

# If there's some overlap in the rename destinations, we

590

# consider it not divergent. For example, if side 1 copies 'a'

590

# consider it not divergent. For example, if side 1 copies 'a'

591

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

591

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

592

# and 'd' and deletes 'a'.

592

# and 'd' and deletes 'a'.

593

if dsts1 & dsts2:

593

if dsts1 & dsts2:

594

for dst in dsts1 & dsts2:

594

for dst in dsts1 & dsts2:

595

copy[dst] = src

595

copy[dst] = src

596

else:

596

else:

597

diverge[src] = sorted(dsts1 | dsts2)

597

diverge[src] = sorted(dsts1 | dsts2)

598

elif src in m1 and src in m2:

598

elif src in m1 and src in m2:

599

# copied on both sides

599

# copied on both sides

600

dsts1 = set(dsts1)

600

dsts1 = set(dsts1)

601

dsts2 = set(dsts2)

601

dsts2 = set(dsts2)

602

for dst in dsts1 & dsts2:

602

for dst in dsts1 & dsts2:

603

copy[dst] = src

603

copy[dst] = src

604

# TODO: Handle cases where it was renamed on one side and copied

604

# TODO: Handle cases where it was renamed on one side and copied

605

# on the other side

605

# on the other side

606

elif dsts1:

606

elif dsts1:

607

# copied/renamed only on side 1

607

# copied/renamed only on side 1

608

_checksinglesidecopies(

608

_checksinglesidecopies(

609

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

609

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

610

)

610

)

611

elif dsts2:

611

elif dsts2:

612

# copied/renamed only on side 2

612

# copied/renamed only on side 2

613

_checksinglesidecopies(

613

_checksinglesidecopies(

614

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

614

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

615

)

615

)

616

617

renamedeleteset = set()

618

divergeset = set()

619

for dsts in diverge.values():

620

divergeset.update(dsts)

621

for dsts in renamedelete.values():

622

renamedeleteset.update(dsts)

623

624

# find interesting file sets from manifests

617

# find interesting file sets from manifests

625

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

618

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

626

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

619

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

627

u1 = sorted(addedinm1 - addedinm2)

620

u1 = sorted(addedinm1 - addedinm2)

628

u2 = sorted(addedinm2 - addedinm1)

621

u2 = sorted(addedinm2 - addedinm1)

629

622

630

header = b" unmatched files in %s"

623

header = b" unmatched files in %s"

631

if u1:

624

if u1:

632

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

625

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

633

if u2:

626

if u2:

634

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

627

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

635

628

636

fullcopy = copies1.copy()

629

fullcopy = copies1.copy()

637

fullcopy.update(copies2)

630

fullcopy.update(copies2)

638

631

639

if repo.ui.debugflag:

632

if repo.ui.debugflag:

633

renamedeleteset = set()

634

divergeset = set()

635

for dsts in diverge.values():

636

divergeset.update(dsts)

637

for dsts in renamedelete.values():

638

renamedeleteset.update(dsts)

639

640

repo.ui.debug(

640

repo.ui.debug(

641

b" all copies found (* = to merge, ! = divergent, "

641

b" all copies found (* = to merge, ! = divergent, "

642

b"% = renamed and deleted):\n"

642

b"% = renamed and deleted):\n"

643

)

643

)

644

for f in sorted(fullcopy):

644

for f in sorted(fullcopy):

645

note = b""

645

note = b""

646

if f in copy:

646

if f in copy:

647

note += b"*"

647

note += b"*"

648

if f in divergeset:

648

if f in divergeset:

649

note += b"!"

649

note += b"!"

650

if f in renamedeleteset:

650

if f in renamedeleteset:

651

note += b"%"

651

note += b"%"

652

repo.ui.debug(

652

repo.ui.debug(

653

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

653

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

654

)

654

)

655

del renamedeleteset

655

del divergeset

656

del divergeset

656

657

repo.ui.debug(b" checking for directory renames\n")

658

repo.ui.debug(b" checking for directory renames\n")

658

659

# generate a directory move map

660

# generate a directory move map

660

d1, d2 = c1.dirs(), c2.dirs()

661

d1, d2 = c1.dirs(), c2.dirs()

661

invalid = set()

662

invalid = set()

662

dirmove = {}

663

dirmove = {}

663

664

# examine each file copy for a potential directory move, which is

665

# examine each file copy for a potential directory move, which is

665

# when all the files in a directory are moved to a new directory

666

# when all the files in a directory are moved to a new directory

666

for dst, src in pycompat.iteritems(fullcopy):

667

for dst, src in pycompat.iteritems(fullcopy):

667

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

668

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

668

if dsrc in invalid:

669

if dsrc in invalid:

669

# already seen to be uninteresting

670

# already seen to be uninteresting

670

continue

671

continue

671

elif dsrc in d1 and ddst in d1:

672

elif dsrc in d1 and ddst in d1:

672

# directory wasn't entirely moved locally

673

# directory wasn't entirely moved locally

673

invalid.add(dsrc)

674

invalid.add(dsrc)

674

elif dsrc in d2 and ddst in d2:

675

elif dsrc in d2 and ddst in d2:

675

# directory wasn't entirely moved remotely

676

# directory wasn't entirely moved remotely

676

invalid.add(dsrc)

677

invalid.add(dsrc)

677

elif dsrc in dirmove and dirmove[dsrc] != ddst:

678

elif dsrc in dirmove and dirmove[dsrc] != ddst:

678

# files from the same directory moved to two different places

679

# files from the same directory moved to two different places

679

invalid.add(dsrc)

680

invalid.add(dsrc)

680

else:

681

else:

681

# looks good so far

682

# looks good so far

682

dirmove[dsrc] = ddst

683

dirmove[dsrc] = ddst

683

684

for i in invalid:

685

for i in invalid:

685

if i in dirmove:

686

if i in dirmove:

686

del dirmove[i]

687

del dirmove[i]

687

del d1, d2, invalid

688

del d1, d2, invalid

688

689

if not dirmove:

690

if not dirmove:

690

return copy, {}, diverge, renamedelete, {}

691

return copy, {}, diverge, renamedelete, {}

691

692

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

693

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

693

694

for d in dirmove:

695

for d in dirmove:

695

repo.ui.debug(

696

repo.ui.debug(

696

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

697

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

697

)

698

)

698

699

movewithdir = {}

700

movewithdir = {}

700

# check unaccounted nonoverlapping files against directory moves

701

# check unaccounted nonoverlapping files against directory moves

701

for f in u1 + u2:

702

for f in u1 + u2:

702

if f not in fullcopy:

703

if f not in fullcopy:

703

for d in dirmove:

704

for d in dirmove:

704

if f.startswith(d):

705

if f.startswith(d):

705

# new file added in a directory that was moved, move it

706

# new file added in a directory that was moved, move it

706

df = dirmove[d] + f[len(d) :]

707

df = dirmove[d] + f[len(d) :]

707

if df not in copy:

708

if df not in copy:

708

movewithdir[f] = df

709

movewithdir[f] = df

709

repo.ui.debug(

710

repo.ui.debug(

710

b" pending file src: '%s' -> dst: '%s'\n"

711

b" pending file src: '%s' -> dst: '%s'\n"

711

% (f, df)

712

% (f, df)

712

)

713

)

713

break

714

break

714

715

return copy, movewithdir, diverge, renamedelete, dirmove

716

return copy, movewithdir, diverge, renamedelete, dirmove

716

717

718

def _heuristicscopytracing(repo, c1, c2, base):

719

def _heuristicscopytracing(repo, c1, c2, base):

719

""" Fast copytracing using filename heuristics

720

""" Fast copytracing using filename heuristics

720

721

Assumes that moves or renames are of following two types:

722

Assumes that moves or renames are of following two types:

722

723

1) Inside a directory only (same directory name but different filenames)

724

1) Inside a directory only (same directory name but different filenames)

724

2) Move from one directory to another

725

2) Move from one directory to another

725

(same filenames but different directory names)

726

(same filenames but different directory names)

726

727

Works only when there are no merge commits in the "source branch".

728

Works only when there are no merge commits in the "source branch".

728

Source branch is commits from base up to c2 not including base.

729

Source branch is commits from base up to c2 not including base.

729

730

If merge is involved it fallbacks to _fullcopytracing().

731

If merge is involved it fallbacks to _fullcopytracing().

731

732

Can be used by setting the following config:

733

Can be used by setting the following config:

733

734

[experimental]

735

[experimental]

735

copytrace = heuristics

736

copytrace = heuristics

736

737

In some cases the copy/move candidates found by heuristics can be very large

738

In some cases the copy/move candidates found by heuristics can be very large

738

in number and that will make the algorithm slow. The number of possible

739

in number and that will make the algorithm slow. The number of possible

739

candidates to check can be limited by using the config

740

candidates to check can be limited by using the config

740

`experimental.copytrace.movecandidateslimit` which defaults to 100.

741

`experimental.copytrace.movecandidateslimit` which defaults to 100.

741

"""

742

"""

742

743

if c1.rev() is None:

744

if c1.rev() is None:

744

c1 = c1.p1()

745

c1 = c1.p1()

745

if c2.rev() is None:

746

if c2.rev() is None:

746

c2 = c2.p1()

747

c2 = c2.p1()

747

748

copies = {}

749

copies = {}

749

750

changedfiles = set()

751

changedfiles = set()

751

m1 = c1.manifest()

752

m1 = c1.manifest()

752

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

753

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

753

# If base is not in c2 branch, we switch to fullcopytracing

754

# If base is not in c2 branch, we switch to fullcopytracing

754

repo.ui.debug(

755

repo.ui.debug(

755

b"switching to full copytracing as base is not "

756

b"switching to full copytracing as base is not "

756

b"an ancestor of c2\n"

757

b"an ancestor of c2\n"

757

)

758

)

758

return _fullcopytracing(repo, c1, c2, base)

759

return _fullcopytracing(repo, c1, c2, base)

759

760

ctx = c2

761

ctx = c2

761

while ctx != base:

762

while ctx != base:

762

if len(ctx.parents()) == 2:

763

if len(ctx.parents()) == 2:

763

# To keep things simple let's not handle merges

764

# To keep things simple let's not handle merges

764

repo.ui.debug(b"switching to full copytracing because of merges\n")

765

repo.ui.debug(b"switching to full copytracing because of merges\n")

765

return _fullcopytracing(repo, c1, c2, base)

766

return _fullcopytracing(repo, c1, c2, base)

766

changedfiles.update(ctx.files())

767

changedfiles.update(ctx.files())

767

ctx = ctx.p1()

768

ctx = ctx.p1()

768

769

cp = _forwardcopies(base, c2)

770

cp = _forwardcopies(base, c2)

770

for dst, src in pycompat.iteritems(cp):

771

for dst, src in pycompat.iteritems(cp):

771

if src in m1:

772

if src in m1:

772

copies[dst] = src

773

copies[dst] = src

773

774

# file is missing if it isn't present in the destination, but is present in

775

# file is missing if it isn't present in the destination, but is present in

775

# the base and present in the source.

776

# the base and present in the source.

776

# Presence in the base is important to exclude added files, presence in the

777

# Presence in the base is important to exclude added files, presence in the

777

# source is important to exclude removed files.

778

# source is important to exclude removed files.

778

filt = lambda f: f not in m1 and f in base and f in c2

779

filt = lambda f: f not in m1 and f in base and f in c2

779

missingfiles = [f for f in changedfiles if filt(f)]

780

missingfiles = [f for f in changedfiles if filt(f)]

780

781

if missingfiles:

782

if missingfiles:

782

basenametofilename = collections.defaultdict(list)

783

basenametofilename = collections.defaultdict(list)

783

dirnametofilename = collections.defaultdict(list)

784

dirnametofilename = collections.defaultdict(list)

784

785

for f in m1.filesnotin(base.manifest()):

786

for f in m1.filesnotin(base.manifest()):

786

basename = os.path.basename(f)

787

basename = os.path.basename(f)

787

dirname = os.path.dirname(f)

788

dirname = os.path.dirname(f)

788

basenametofilename[basename].append(f)

789

basenametofilename[basename].append(f)

789

dirnametofilename[dirname].append(f)

790

dirnametofilename[dirname].append(f)

790

791

for f in missingfiles:

792

for f in missingfiles:

792

basename = os.path.basename(f)

793

basename = os.path.basename(f)

793

dirname = os.path.dirname(f)

794

dirname = os.path.dirname(f)

794

samebasename = basenametofilename[basename]

795

samebasename = basenametofilename[basename]

795

samedirname = dirnametofilename[dirname]

796

samedirname = dirnametofilename[dirname]

796

movecandidates = samebasename + samedirname

797

movecandidates = samebasename + samedirname

797

# f is guaranteed to be present in c2, that's why

798

# f is guaranteed to be present in c2, that's why

798

# c2.filectx(f) won't fail

799

# c2.filectx(f) won't fail

799

f2 = c2.filectx(f)

800

f2 = c2.filectx(f)

800

# we can have a lot of candidates which can slow down the heuristics

801

# we can have a lot of candidates which can slow down the heuristics

801

# config value to limit the number of candidates moves to check

802

# config value to limit the number of candidates moves to check

802

maxcandidates = repo.ui.configint(

803

maxcandidates = repo.ui.configint(

803

b'experimental', b'copytrace.movecandidateslimit'

804

b'experimental', b'copytrace.movecandidateslimit'

804

)

805

)

805

806

if len(movecandidates) > maxcandidates:

807

if len(movecandidates) > maxcandidates:

807

repo.ui.status(

808

repo.ui.status(

808

_(

809

_(

809

b"skipping copytracing for '%s', more "

810

b"skipping copytracing for '%s', more "

810

b"candidates than the limit: %d\n"

811

b"candidates than the limit: %d\n"

811

)

812

)

812

% (f, len(movecandidates))

813

% (f, len(movecandidates))

813

)

814

)

814

continue

815

continue

815

816

for candidate in movecandidates:

817

for candidate in movecandidates:

817

f1 = c1.filectx(candidate)

818

f1 = c1.filectx(candidate)

818

if _related(f1, f2):

819

if _related(f1, f2):

819

# if there are a few related copies then we'll merge

820

# if there are a few related copies then we'll merge

820

# changes into all of them. This matches the behaviour

821

# changes into all of them. This matches the behaviour

821

# of upstream copytracing

822

# of upstream copytracing

822

copies[candidate] = f

823

copies[candidate] = f

823

824

return copies, {}, {}, {}, {}

825

return copies, {}, {}, {}, {}

825

826

827

def _related(f1, f2):

828

def _related(f1, f2):

828

"""return True if f1 and f2 filectx have a common ancestor

829

"""return True if f1 and f2 filectx have a common ancestor

829

830

Walk back to common ancestor to see if the two files originate

831

Walk back to common ancestor to see if the two files originate

831

from the same file. Since workingfilectx's rev() is None it messes

832

from the same file. Since workingfilectx's rev() is None it messes

832

up the integer comparison logic, hence the pre-step check for

833

up the integer comparison logic, hence the pre-step check for

833

None (f1 and f2 can only be workingfilectx's initially).

834

None (f1 and f2 can only be workingfilectx's initially).

834

"""

835

"""

835

836

if f1 == f2:

837

if f1 == f2:

837

return True # a match

838

return True # a match

838

839

g1, g2 = f1.ancestors(), f2.ancestors()

840

g1, g2 = f1.ancestors(), f2.ancestors()

840

try:

841

try:

841

f1r, f2r = f1.linkrev(), f2.linkrev()

842

f1r, f2r = f1.linkrev(), f2.linkrev()

842

843

if f1r is None:

844

if f1r is None:

844

f1 = next(g1)

845

f1 = next(g1)

845

if f2r is None:

846

if f2r is None:

846

f2 = next(g2)

847

f2 = next(g2)

847

848

while True:

849

while True:

849

f1r, f2r = f1.linkrev(), f2.linkrev()

850

f1r, f2r = f1.linkrev(), f2.linkrev()

850

if f1r > f2r:

851

if f1r > f2r:

851

f1 = next(g1)

852

f1 = next(g1)

852

elif f2r > f1r:

853

elif f2r > f1r:

853

f2 = next(g2)

854

f2 = next(g2)

854

else: # f1 and f2 point to files in the same linkrev

855

else: # f1 and f2 point to files in the same linkrev

855

return f1 == f2 # true if they point to the same file

856

return f1 == f2 # true if they point to the same file

856

except StopIteration:

857

except StopIteration:

857

return False

858

return False

858

859

860

def graftcopies(wctx, ctx, base):

861

def graftcopies(wctx, ctx, base):

861

"""reproduce copies between base and ctx in the wctx

862

"""reproduce copies between base and ctx in the wctx

862

863

Unlike mergecopies(), this function will only consider copies between base

864

Unlike mergecopies(), this function will only consider copies between base

864

and ctx; it will ignore copies between base and wctx. Also unlike

865

and ctx; it will ignore copies between base and wctx. Also unlike

865

mergecopies(), this function will apply copies to the working copy (instead

866

mergecopies(), this function will apply copies to the working copy (instead

866

of just returning information about the copies). That makes it cheaper

867

of just returning information about the copies). That makes it cheaper

867

(especially in the common case of base==ctx.p1()) and useful also when

868

(especially in the common case of base==ctx.p1()) and useful also when

868

experimental.copytrace=off.

869

experimental.copytrace=off.

869

870

merge.update() will have already marked most copies, but it will only

871

merge.update() will have already marked most copies, but it will only

871

mark copies if it thinks the source files are related (see

872

mark copies if it thinks the source files are related (see

872

merge._related()). It will also not mark copies if the file wasn't modified

873

merge._related()). It will also not mark copies if the file wasn't modified

873

on the local side. This function adds the copies that were "missed"

874

on the local side. This function adds the copies that were "missed"

874

by merge.update().

875

by merge.update().

875

"""

876

"""

876

new_copies = pathcopies(base, ctx)

877

new_copies = pathcopies(base, ctx)

877

_filter(wctx.p1(), wctx, new_copies)

878

_filter(wctx.p1(), wctx, new_copies)

878

for dst, src in pycompat.iteritems(new_copies):

879

for dst, src in pycompat.iteritems(new_copies):

879

wctx[dst].markcopied(src)

880

wctx[dst].markcopied(src)

880

881

882

def computechangesetfilesadded(ctx):

883

def computechangesetfilesadded(ctx):

883

"""return the list of files added in a changeset

884

"""return the list of files added in a changeset

884

"""

885

"""

885

added = []

886

added = []

886

for f in ctx.files():

887

for f in ctx.files():

887

if not any(f in p for p in ctx.parents()):

888

if not any(f in p for p in ctx.parents()):

888

added.append(f)

889

added.append(f)

889

return added

890

return added

890

891

892

def computechangesetfilesremoved(ctx):

893

def computechangesetfilesremoved(ctx):

893

"""return the list of files removed in a changeset

894

"""return the list of files removed in a changeset

894

"""

895

"""

895

removed = []

896

removed = []

896

for f in ctx.files():

897

for f in ctx.files():

897

if f not in ctx:

898

if f not in ctx:

898

removed.append(f)

899

removed.append(f)

899

return removed

900

return removed

900

901

902

def computechangesetcopies(ctx):

903

def computechangesetcopies(ctx):

903

"""return the copies data for a changeset

904

"""return the copies data for a changeset

904

905

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

906

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

906

907

Each dictionnary are in the form: `{newname: oldname}`

908

Each dictionnary are in the form: `{newname: oldname}`

908

"""

909

"""

909

p1copies = {}

910

p1copies = {}

910

p2copies = {}

911

p2copies = {}

911

p1 = ctx.p1()

912

p1 = ctx.p1()

912

p2 = ctx.p2()

913

p2 = ctx.p2()

913

narrowmatch = ctx._repo.narrowmatch()

914

narrowmatch = ctx._repo.narrowmatch()

914

for dst in ctx.files():

915

for dst in ctx.files():

915

if not narrowmatch(dst) or dst not in ctx:

916

if not narrowmatch(dst) or dst not in ctx:

916

continue

917

continue

917

copied = ctx[dst].renamed()

918

copied = ctx[dst].renamed()

918

if not copied:

919

if not copied:

919

continue

920

continue

920

src, srcnode = copied

921

src, srcnode = copied

921

if src in p1 and p1[src].filenode() == srcnode:

922

if src in p1 and p1[src].filenode() == srcnode:

922

p1copies[dst] = src

923

p1copies[dst] = src

923

elif src in p2 and p2[src].filenode() == srcnode:

924

elif src in p2 and p2[src].filenode() == srcnode:

924

p2copies[dst] = src

925

p2copies[dst] = src

925

return p1copies, p2copies

926

return p1copies, p2copies

926

927

928

def encodecopies(files, copies):

929

def encodecopies(files, copies):

929

items = []

930

items = []

930

for i, dst in enumerate(files):

931

for i, dst in enumerate(files):

931

if dst in copies:

932

if dst in copies:

932

items.append(b'%d\0%s' % (i, copies[dst]))

933

items.append(b'%d\0%s' % (i, copies[dst]))

933

if len(items) != len(copies):

934

if len(items) != len(copies):

934

raise error.ProgrammingError(

935

raise error.ProgrammingError(

935

b'some copy targets missing from file list'

936

b'some copy targets missing from file list'

936

)

937

)

937

return b"\n".join(items)

938

return b"\n".join(items)

938

939

940

def decodecopies(files, data):

941

def decodecopies(files, data):

941

try:

942

try:

942

copies = {}

943

copies = {}

943

if not data:

944

if not data:

944

return copies

945

return copies

945

for l in data.split(b'\n'):

946

for l in data.split(b'\n'):

946

strindex, src = l.split(b'\0')

947

strindex, src = l.split(b'\0')

947

i = int(strindex)

948

i = int(strindex)

948

dst = files[i]

949

dst = files[i]

949

copies[dst] = src

950

copies[dst] = src

950

return copies

951

return copies

951

except (ValueError, IndexError):

952

except (ValueError, IndexError):

952

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

953

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

953

# used different syntax for the value.

954

# used different syntax for the value.

954

return None

955

return None

955

956

957

def encodefileindices(files, subset):

958

def encodefileindices(files, subset):

958

subset = set(subset)

959

subset = set(subset)

959

indices = []

960

indices = []

960

for i, f in enumerate(files):

961

for i, f in enumerate(files):

961

if f in subset:

962

if f in subset:

962

indices.append(b'%d' % i)

963

indices.append(b'%d' % i)

963

return b'\n'.join(indices)

964

return b'\n'.join(indices)

964

965

966

def decodefileindices(files, data):

967

def decodefileindices(files, data):

967

try:

968

try:

968

subset = []

969

subset = []

969

if not data:

970

if not data:

970

return subset

971

return subset

971

for strindex in data.split(b'\n'):

972

for strindex in data.split(b'\n'):

972

i = int(strindex)

973

i = int(strindex)

973

if i < 0 or i >= len(files):

974

if i < 0 or i >= len(files):

974

return None

975

return None

975

subset.append(files[i])

976

subset.append(files[i])

976

return subset

977

return subset

977

except (ValueError, IndexError):

978

except (ValueError, IndexError):

978

# Perhaps someone had chosen the same key name (e.g. "added") and

979

# Perhaps someone had chosen the same key name (e.g. "added") and

979

# used different syntax for the value.

980

# used different syntax for the value.

980

return None

981

return None

981

982

983

def _getsidedata(srcrepo, rev):

984

def _getsidedata(srcrepo, rev):

984

ctx = srcrepo[rev]

985

ctx = srcrepo[rev]

985

filescopies = computechangesetcopies(ctx)

986

filescopies = computechangesetcopies(ctx)

986

filesadded = computechangesetfilesadded(ctx)

987

filesadded = computechangesetfilesadded(ctx)

987

filesremoved = computechangesetfilesremoved(ctx)

988

filesremoved = computechangesetfilesremoved(ctx)

988

sidedata = {}

989

sidedata = {}

989

if any([filescopies, filesadded, filesremoved]):

990

if any([filescopies, filesadded, filesremoved]):

990

sortedfiles = sorted(ctx.files())

991

sortedfiles = sorted(ctx.files())

991

p1copies, p2copies = filescopies

992

p1copies, p2copies = filescopies

992

p1copies = encodecopies(sortedfiles, p1copies)

993

p1copies = encodecopies(sortedfiles, p1copies)

993

p2copies = encodecopies(sortedfiles, p2copies)

994

p2copies = encodecopies(sortedfiles, p2copies)

994

filesadded = encodefileindices(sortedfiles, filesadded)

995

filesadded = encodefileindices(sortedfiles, filesadded)

995

filesremoved = encodefileindices(sortedfiles, filesremoved)

996

filesremoved = encodefileindices(sortedfiles, filesremoved)

996

if p1copies:

997

if p1copies:

997

sidedata[sidedatamod.SD_P1COPIES] = p1copies

998

sidedata[sidedatamod.SD_P1COPIES] = p1copies

998

if p2copies:

999

if p2copies:

999

sidedata[sidedatamod.SD_P2COPIES] = p2copies

1000

sidedata[sidedatamod.SD_P2COPIES] = p2copies

1000

if filesadded:

1001

if filesadded:

1001

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1002

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1002

if filesremoved:

1003

if filesremoved:

1003

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1004

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1004

return sidedata

1005

return sidedata

1005

1006

1007

def getsidedataadder(srcrepo, destrepo):

1008

def getsidedataadder(srcrepo, destrepo):

1008

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

1009

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

1009

if pycompat.iswindows or not use_w:

1010

if pycompat.iswindows or not use_w:

1010

return _get_simple_sidedata_adder(srcrepo, destrepo)

1011

return _get_simple_sidedata_adder(srcrepo, destrepo)

1011

else:

1012

else:

1012

return _get_worker_sidedata_adder(srcrepo, destrepo)

1013

return _get_worker_sidedata_adder(srcrepo, destrepo)

1013

1014

1015

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1016

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1016

"""The function used by worker precomputing sidedata

1017

"""The function used by worker precomputing sidedata

1017

1018

It read an input queue containing revision numbers

1019

It read an input queue containing revision numbers

1019

It write in an output queue containing (rev, <sidedata-map>)

1020

It write in an output queue containing (rev, <sidedata-map>)

1020

1021

The `None` input value is used as a stop signal.

1022

The `None` input value is used as a stop signal.

1022

1023

The `tokens` semaphore is user to avoid having too many unprocessed

1024

The `tokens` semaphore is user to avoid having too many unprocessed

1024

entries. The workers needs to acquire one token before fetching a task.

1025

entries. The workers needs to acquire one token before fetching a task.

1025

They will be released by the consumer of the produced data.

1026

They will be released by the consumer of the produced data.

1026

"""

1027

"""

1027

tokens.acquire()

1028

tokens.acquire()

1028

rev = revs_queue.get()

1029

rev = revs_queue.get()

1029

while rev is not None:

1030

while rev is not None:

1030

data = _getsidedata(srcrepo, rev)

1031

data = _getsidedata(srcrepo, rev)

1031

sidedata_queue.put((rev, data))

1032

sidedata_queue.put((rev, data))

1032

tokens.acquire()

1033

tokens.acquire()

1033

rev = revs_queue.get()

1034

rev = revs_queue.get()

1034

# processing of `None` is completed, release the token.

1035

# processing of `None` is completed, release the token.

1035

tokens.release()

1036

tokens.release()

1036

1037

1038

BUFF_PER_WORKER = 50

1039

BUFF_PER_WORKER = 50

1039

1040

1041

def _get_worker_sidedata_adder(srcrepo, destrepo):

1042

def _get_worker_sidedata_adder(srcrepo, destrepo):

1042

"""The parallel version of the sidedata computation

1043

"""The parallel version of the sidedata computation

1043

1044

This code spawn a pool of worker that precompute a buffer of sidedata

1045

This code spawn a pool of worker that precompute a buffer of sidedata

1045

before we actually need them"""

1046

before we actually need them"""

1046

# avoid circular import copies -> scmutil -> worker -> copies

1047

# avoid circular import copies -> scmutil -> worker -> copies

1047

from . import worker

1048

from . import worker

1048

1049

nbworkers = worker._numworkers(srcrepo.ui)

1050

nbworkers = worker._numworkers(srcrepo.ui)

1050

1051

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1052

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1052

revsq = multiprocessing.Queue()

1053

revsq = multiprocessing.Queue()

1053

sidedataq = multiprocessing.Queue()

1054

sidedataq = multiprocessing.Queue()

1054

1055

assert srcrepo.filtername is None

1056

assert srcrepo.filtername is None

1056

# queue all tasks beforehand, revision numbers are small and it make

1057

# queue all tasks beforehand, revision numbers are small and it make

1057

# synchronisation simpler

1058

# synchronisation simpler

1058

#

1059

#

1059

# Since the computation for each node can be quite expensive, the overhead

1060

# Since the computation for each node can be quite expensive, the overhead

1060

# of using a single queue is not revelant. In practice, most computation

1061

# of using a single queue is not revelant. In practice, most computation

1061

# are fast but some are very expensive and dominate all the other smaller

1062

# are fast but some are very expensive and dominate all the other smaller

1062

# cost.

1063

# cost.

1063

for r in srcrepo.changelog.revs():

1064

for r in srcrepo.changelog.revs():

1064

revsq.put(r)

1065

revsq.put(r)

1065

# queue the "no more tasks" markers

1066

# queue the "no more tasks" markers

1066

for i in range(nbworkers):

1067

for i in range(nbworkers):

1067

revsq.put(None)

1068

revsq.put(None)

1068

1069

allworkers = []

1070

allworkers = []

1070

for i in range(nbworkers):

1071

for i in range(nbworkers):

1071

args = (srcrepo, revsq, sidedataq, tokens)

1072

args = (srcrepo, revsq, sidedataq, tokens)

1072

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1073

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1073

allworkers.append(w)

1074

allworkers.append(w)

1074

w.start()

1075

w.start()

1075

1076

# dictionnary to store results for revision higher than we one we are

1077

# dictionnary to store results for revision higher than we one we are

1077

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1078

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1078

# received, when shelve 43 for later use.

1079

# received, when shelve 43 for later use.

1079

staging = {}

1080

staging = {}

1080

1081

def sidedata_companion(revlog, rev):

1082

def sidedata_companion(revlog, rev):

1082

sidedata = {}

1083

sidedata = {}

1083

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1084

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1084

# Is the data previously shelved ?

1085

# Is the data previously shelved ?

1085

sidedata = staging.pop(rev, None)

1086

sidedata = staging.pop(rev, None)

1086

if sidedata is None:

1087

if sidedata is None:

1087

# look at the queued result until we find the one we are lookig

1088

# look at the queued result until we find the one we are lookig

1088

# for (shelve the other ones)

1089

# for (shelve the other ones)

1089

r, sidedata = sidedataq.get()

1090

r, sidedata = sidedataq.get()

1090

while r != rev:

1091

while r != rev:

1091

staging[r] = sidedata

1092

staging[r] = sidedata

1092

r, sidedata = sidedataq.get()

1093

r, sidedata = sidedataq.get()

1093

tokens.release()

1094

tokens.release()

1094

return False, (), sidedata

1095

return False, (), sidedata

1095

1096

return sidedata_companion

1097

return sidedata_companion

1097

1098

1099

def _get_simple_sidedata_adder(srcrepo, destrepo):

1100

def _get_simple_sidedata_adder(srcrepo, destrepo):

1100

"""The simple version of the sidedata computation

1101

"""The simple version of the sidedata computation

1101

1102

It just compute it in the same thread on request"""

1103

It just compute it in the same thread on request"""

1103

1104

def sidedatacompanion(revlog, rev):

1105

def sidedatacompanion(revlog, rev):

1105

sidedata = {}

1106

sidedata = {}

1106

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1107

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1107

sidedata = _getsidedata(srcrepo, rev)

1108

sidedata = _getsidedata(srcrepo, rev)

1108

return False, (), sidedata

1109

return False, (), sidedata

1109

1110

return sidedatacompanion

1111

return sidedatacompanion

1111

1112

1113

def getsidedataremover(srcrepo, destrepo):

1114

def getsidedataremover(srcrepo, destrepo):

1114

def sidedatacompanion(revlog, rev):

1115

def sidedatacompanion(revlog, rev):

1115

f = ()

1116

f = ()

1116

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1117

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1117

if revlog.flags(rev) & REVIDX_SIDEDATA:

1118

if revlog.flags(rev) & REVIDX_SIDEDATA:

1118

f = (

1119

f = (

1119

sidedatamod.SD_P1COPIES,

1120

sidedatamod.SD_P1COPIES,

1120

sidedatamod.SD_P2COPIES,

1121

sidedatamod.SD_P2COPIES,

1121

sidedatamod.SD_FILESADDED,

1122

sidedatamod.SD_FILESADDED,

1122

sidedatamod.SD_FILESREMOVED,

1123

sidedatamod.SD_FILESREMOVED,

1123

)

1124

)

1124

return False, f, {}

1125

return False, f, {}

1125

1126

return sidedatacompanion

1127

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import multiprocessing
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     # A small cache to avoid doing the work twice for merges
                     #
                     # In the vast majority of cases, if we ask information for a revision
                     # about 1 parent, we'll later ask it for the other. So it make sense to
                     # keep the information around when reaching the first parent of a merge
                     # and dropping it after it was provided for the second parents.
                     #
                     # It exists cases were only one parent of the merge will be walked. It
                     # happens when the "destination" the copy tracing is descendant from a
                     # new root, not common with the "source". In that case, we will only walk
                     # through merge parents that are descendant of changesets common
                     # between "source" and "destination".
                     #
                     # With the current case implementation if such changesets have a copy
                     # information, we'll keep them in memory until the end of
                     # _changesetforwardcopies. We don't expect the case to be frequent
                     # enough to matters.
                     #
                     # In addition, it would be possible to reach pathological case, were
                     # many first parent are met before any second parent is reached. In
                     # that case the cache could grow. If this even become an issue one can
                     # safely introduce a maximum cache size. This would trade extra CPU/IO
                     # time to save memory.
                     merge_caches = {}
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
                             e = merge_caches.pop(rev, None)
                             if e is not None:
                                 return e
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                             if p1 != node.nullrev and p2 != node.nullrev:
                                 # XXX some case we over cache, IGNORE
                                 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = []
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
             def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                             # _chain makes a copies, we can avoid doing so in some
                             # simple/linear cases.
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 return all_copies[targetrev]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return {}, {}, {}, {}, {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
-                renamedeleteset = set()
-                divergeset = set()
-                for dsts in diverge.values():
-                    divergeset.update(dsts)
-                for dsts in renamedelete.values():
-                    renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if repo.ui.debugflag:
+                    renamedeleteset = set()
+                    divergeset = set()
+                    for dsts in diverge.values():
+                        divergeset.update(dsts)
+                    for dsts in renamedelete.values():
+                        renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
+                    del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion