upstream/mercurial-mirror Commit - r44552:06e7e765

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import multiprocessing

11

import multiprocessing

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(prefix, suffix):

67

def _chain(prefix, suffix):

68

"""chain two sets of copies 'prefix' and 'suffix'"""

68

"""chain two sets of copies 'prefix' and 'suffix'"""

69

result = prefix.copy()

69

result = prefix.copy()

70

for key, value in pycompat.iteritems(suffix):

70

for key, value in pycompat.iteritems(suffix):

71

result[key] = prefix.get(value, value)

71

result[key] = prefix.get(value, value)

72

return result

72

return result

73

74

75

def _tracefile(fctx, am, basemf):

75

def _tracefile(fctx, am, basemf):

76

"""return file context that is the ancestor of fctx present in ancestor

76

"""return file context that is the ancestor of fctx present in ancestor

77

manifest am

77

manifest am

78

79

Note: we used to try and stop after a given limit, however checking if that

79

Note: we used to try and stop after a given limit, however checking if that

80

limit is reached turned out to be very expensive. we are better off

80

limit is reached turned out to be very expensive. we are better off

81

disabling that feature."""

81

disabling that feature."""

82

83

for f in fctx.ancestors():

83

for f in fctx.ancestors():

84

path = f.path()

84

path = f.path()

85

if am.get(path, None) == f.filenode():

85

if am.get(path, None) == f.filenode():

86

return path

86

return path

87

if basemf and basemf.get(path, None) == f.filenode():

87

if basemf and basemf.get(path, None) == f.filenode():

88

return path

88

return path

89

90

91

def _dirstatecopies(repo, match=None):

91

def _dirstatecopies(repo, match=None):

92

ds = repo.dirstate

92

ds = repo.dirstate

93

c = ds.copies().copy()

93

c = ds.copies().copy()

94

for k in list(c):

94

for k in list(c):

95

if ds[k] not in b'anm' or (match and not match(k)):

95

if ds[k] not in b'anm' or (match and not match(k)):

96

del c[k]

96

del c[k]

97

return c

97

return c

98

99

100

def _computeforwardmissing(a, b, match=None):

100

def _computeforwardmissing(a, b, match=None):

101

"""Computes which files are in b but not a.

101

"""Computes which files are in b but not a.

102

This is its own function so extensions can easily wrap this call to see what

102

This is its own function so extensions can easily wrap this call to see what

103

files _forwardcopies is about to process.

103

files _forwardcopies is about to process.

104

"""

104

"""

105

ma = a.manifest()

105

ma = a.manifest()

106

mb = b.manifest()

106

mb = b.manifest()

107

return mb.filesnotin(ma, match=match)

107

return mb.filesnotin(ma, match=match)

108

109

110

def usechangesetcentricalgo(repo):

110

def usechangesetcentricalgo(repo):

111

"""Checks if we should use changeset-centric copy algorithms"""

111

"""Checks if we should use changeset-centric copy algorithms"""

112

if repo.filecopiesmode == b'changeset-sidedata':

112

if repo.filecopiesmode == b'changeset-sidedata':

113

return True

113

return True

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

115

changesetsource = (b'changeset-only', b'compatibility')

115

changesetsource = (b'changeset-only', b'compatibility')

116

return readfrom in changesetsource

116

return readfrom in changesetsource

117

118

119

def _committedforwardcopies(a, b, base, match):

119

def _committedforwardcopies(a, b, base, match):

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

121

# files might have to be traced back to the fctx parent of the last

121

# files might have to be traced back to the fctx parent of the last

122

# one-side-only changeset, but not further back than that

122

# one-side-only changeset, but not further back than that

123

repo = a._repo

123

repo = a._repo

124

125

if usechangesetcentricalgo(repo):

125

if usechangesetcentricalgo(repo):

126

return _changesetforwardcopies(a, b, match)

126

return _changesetforwardcopies(a, b, match)

127

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

129

dbg = repo.ui.debug

129

dbg = repo.ui.debug

130

if debug:

130

if debug:

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

132

am = a.manifest()

132

am = a.manifest()

133

basemf = None if base is None else base.manifest()

133

basemf = None if base is None else base.manifest()

134

135

# find where new files came from

135

# find where new files came from

136

# we currently don't try to find where old files went, too expensive

136

# we currently don't try to find where old files went, too expensive

137

# this means we can miss a case like 'hg rm b; hg cp a b'

137

# this means we can miss a case like 'hg rm b; hg cp a b'

138

cm = {}

138

cm = {}

139

140

# Computing the forward missing is quite expensive on large manifests, since

140

# Computing the forward missing is quite expensive on large manifests, since

141

# it compares the entire manifests. We can optimize it in the common use

141

# it compares the entire manifests. We can optimize it in the common use

142

# case of computing what copies are in a commit versus its parent (like

142

# case of computing what copies are in a commit versus its parent (like

143

# during a rebase or histedit). Note, we exclude merge commits from this

143

# during a rebase or histedit). Note, we exclude merge commits from this

144

# optimization, since the ctx.files() for a merge commit is not correct for

144

# optimization, since the ctx.files() for a merge commit is not correct for

145

# this comparison.

145

# this comparison.

146

forwardmissingmatch = match

146

forwardmissingmatch = match

147

if b.p1() == a and b.p2().node() == node.nullid:

147

if b.p1() == a and b.p2().node() == node.nullid:

148

filesmatcher = matchmod.exact(b.files())

148

filesmatcher = matchmod.exact(b.files())

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

151

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

153

154

if debug:

154

if debug:

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

156

157

for f in sorted(missing):

157

for f in sorted(missing):

158

if debug:

158

if debug:

159

dbg(b'debug.copies: tracing file: %s\n' % f)

159

dbg(b'debug.copies: tracing file: %s\n' % f)

160

fctx = b[f]

160

fctx = b[f]

161

fctx._ancestrycontext = ancestrycontext

161

fctx._ancestrycontext = ancestrycontext

162

163

if debug:

163

if debug:

164

start = util.timer()

164

start = util.timer()

165

opath = _tracefile(fctx, am, basemf)

165

opath = _tracefile(fctx, am, basemf)

166

if opath:

166

if opath:

167

if debug:

167

if debug:

168

dbg(b'debug.copies: rename of: %s\n' % opath)

168

dbg(b'debug.copies: rename of: %s\n' % opath)

169

cm[f] = opath

169

cm[f] = opath

170

if debug:

170

if debug:

171

dbg(

171

dbg(

172

b'debug.copies: time: %f seconds\n'

172

b'debug.copies: time: %f seconds\n'

173

% (util.timer() - start)

173

% (util.timer() - start)

174

)

174

)

175

return cm

175

return cm

176

177

178

def _revinfogetter(repo):

178

def _revinfogetter(repo):

179

"""return a function that return multiple data given a <rev>"i

179

"""return a function that return multiple data given a <rev>"i

180

181

* p1: revision number of first parent

181

* p1: revision number of first parent

182

* p2: revision number of first parent

182

* p2: revision number of first parent

183

* p1copies: mapping of copies from p1

183

* p1copies: mapping of copies from p1

184

* p2copies: mapping of copies from p2

184

* p2copies: mapping of copies from p2

185

* removed: a list of removed files

185

* removed: a list of removed files

186

"""

186

"""

187

cl = repo.changelog

187

cl = repo.changelog

188

parents = cl.parentrevs

188

parents = cl.parentrevs

189

190

if repo.filecopiesmode == b'changeset-sidedata':

190

if repo.filecopiesmode == b'changeset-sidedata':

191

changelogrevision = cl.changelogrevision

191

changelogrevision = cl.changelogrevision

192

flags = cl.flags

192

flags = cl.flags

193

194

# A small cache to avoid doing the work twice for merges

194

# A small cache to avoid doing the work twice for merges

195

#

195

#

196

# In the vast majority of cases, if we ask information for a revision

196

# In the vast majority of cases, if we ask information for a revision

197

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# about 1 parent, we'll later ask it for the other. So it make sense to

198

# keep the information around when reaching the first parent of a merge

198

# keep the information around when reaching the first parent of a merge

199

# and dropping it after it was provided for the second parents.

199

# and dropping it after it was provided for the second parents.

200

#

200

#

201

# It exists cases were only one parent of the merge will be walked. It

201

# It exists cases were only one parent of the merge will be walked. It

202

# happens when the "destination" the copy tracing is descendant from a

202

# happens when the "destination" the copy tracing is descendant from a

203

# new root, not common with the "source". In that case, we will only walk

203

# new root, not common with the "source". In that case, we will only walk

204

# through merge parents that are descendant of changesets common

204

# through merge parents that are descendant of changesets common

205

# between "source" and "destination".

205

# between "source" and "destination".

206

#

206

#

207

# With the current case implementation if such changesets have a copy

207

# With the current case implementation if such changesets have a copy

208

# information, we'll keep them in memory until the end of

208

# information, we'll keep them in memory until the end of

209

# _changesetforwardcopies. We don't expect the case to be frequent

209

# _changesetforwardcopies. We don't expect the case to be frequent

210

# enough to matters.

210

# enough to matters.

211

#

211

#

212

# In addition, it would be possible to reach pathological case, were

212

# In addition, it would be possible to reach pathological case, were

213

# many first parent are met before any second parent is reached. In

213

# many first parent are met before any second parent is reached. In

214

# that case the cache could grow. If this even become an issue one can

214

# that case the cache could grow. If this even become an issue one can

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

216

# time to save memory.

216

# time to save memory.

217

merge_caches = {}

217

merge_caches = {}

218

219

def revinfo(rev):

219

def revinfo(rev):

220

p1, p2 = parents(rev)

220

p1, p2 = parents(rev)

221

if flags(rev) & REVIDX_SIDEDATA:

221

if flags(rev) & REVIDX_SIDEDATA:

222

e = merge_caches.pop(rev, None)

222

e = merge_caches.pop(rev, None)

223

if e is not None:

223

if e is not None:

224

return e

224

return e

225

c = changelogrevision(rev)

225

c = changelogrevision(rev)

226

p1copies = c.p1copies

226

p1copies = c.p1copies

227

p2copies = c.p2copies

227

p2copies = c.p2copies

228

removed = c.filesremoved

228

removed = c.filesremoved

229

if p1 != node.nullrev and p2 != node.nullrev:

229

if p1 != node.nullrev and p2 != node.nullrev:

230

# XXX some case we over cache, IGNORE

230

# XXX some case we over cache, IGNORE

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

232

else:

232

else:

233

p1copies = {}

233

p1copies = {}

234

p2copies = {}

234

p2copies = {}

235

removed = []

235

removed = []

236

return p1, p2, p1copies, p2copies, removed

236

return p1, p2, p1copies, p2copies, removed

237

238

else:

238

else:

239

240

def revinfo(rev):

240

def revinfo(rev):

241

p1, p2 = parents(rev)

241

p1, p2 = parents(rev)

242

ctx = repo[rev]

242

ctx = repo[rev]

243

p1copies, p2copies = ctx._copies

243

p1copies, p2copies = ctx._copies

244

removed = ctx.filesremoved()

244

removed = ctx.filesremoved()

245

return p1, p2, p1copies, p2copies, removed

245

return p1, p2, p1copies, p2copies, removed

246

247

return revinfo

247

return revinfo

248

249

250

def _changesetforwardcopies(a, b, match):

250

def _changesetforwardcopies(a, b, match):

251

if a.rev() in (node.nullrev, b.rev()):

251

if a.rev() in (node.nullrev, b.rev()):

252

return {}

252

return {}

253

254

repo = a.repo().unfiltered()

254

repo = a.repo().unfiltered()

255

children = {}

255

children = {}

256

revinfo = _revinfogetter(repo)

256

revinfo = _revinfogetter(repo)

257

258

cl = repo.changelog

258

cl = repo.changelog

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

260

mrset = set(missingrevs)

260

mrset = set(missingrevs)

261

roots = set()

261

roots = set()

262

for r in missingrevs:

262

for r in missingrevs:

263

for p in cl.parentrevs(r):

263

for p in cl.parentrevs(r):

264

if p == node.nullrev:

264

if p == node.nullrev:

265

continue

265

continue

266

if p not in children:

266

if p not in children:

267

children[p] = [r]

267

children[p] = [r]

268

else:

268

else:

269

children[p].append(r)

269

children[p].append(r)

270

if p not in mrset:

270

if p not in mrset:

271

roots.add(p)

271

roots.add(p)

272

if not roots:

272

if not roots:

273

# no common revision to track copies from

273

# no common revision to track copies from

274

return {}

274

return {}

275

min_root = min(roots)

275

min_root = min(roots)

276

277

from_head = set(

277

from_head = set(

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

279

)

279

)

280

281

iterrevs = set(from_head)

281

iterrevs = set(from_head)

282

iterrevs &= mrset

282

iterrevs &= mrset

283

iterrevs.update(roots)

283

iterrevs.update(roots)

284

iterrevs.remove(b.rev())

284

iterrevs.remove(b.rev())

285

revs = sorted(iterrevs)

285

revs = sorted(iterrevs)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

287

288

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

290

"""combine the copies information for each item of iterrevs

290

"""combine the copies information for each item of iterrevs

291

292

revs: sorted iterable of revision to visit

292

revs: sorted iterable of revision to visit

293

children: a {parent: [children]} mapping.

293

children: a {parent: [children]} mapping.

294

targetrev: the final copies destination revision (not in iterrevs)

294

targetrev: the final copies destination revision (not in iterrevs)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

296

match: a matcher

296

match: a matcher

297

298

It returns the aggregated copies information for `targetrev`.

298

It returns the aggregated copies information for `targetrev`.

299

"""

299

"""

300

all_copies = {}

300

all_copies = {}

301

alwaysmatch = match.always()

301

alwaysmatch = match.always()

302

for r in revs:

302

for r in revs:

303

copies = all_copies.pop(r, None)

303

copies = all_copies.pop(r, None)

304

if copies is None:

304

if copies is None:

305

# this is a root

305

# this is a root

306

copies = {}

306

copies = {}

307

for i, c in enumerate(children[r]):

307

for i, c in enumerate(children[r]):

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

309

if r == p1:

309

if r == p1:

310

parent = 1

310

parent = 1

311

childcopies = p1copies

311

childcopies = p1copies

312

else:

312

else:

313

assert r == p2

313

assert r == p2

314

parent = 2

314

parent = 2

315

childcopies = p2copies

315

childcopies = p2copies

316

if not alwaysmatch:

316

if not alwaysmatch:

317

childcopies = {

317

childcopies = {

318

dst: src for dst, src in childcopies.items() if match(dst)

318

dst: src for dst, src in childcopies.items() if match(dst)

319

}

319

}

320

newcopies = copies

320

newcopies = copies

321

if childcopies:

321

if childcopies:

322

newcopies = _chain(newcopies, childcopies)

322

newcopies = _chain(newcopies, childcopies)

323

# _chain makes a copies, we can avoid doing so in some

323

# _chain makes a copies, we can avoid doing so in some

324

# simple/linear cases.

324

# simple/linear cases.

325

assert newcopies is not copies

325

assert newcopies is not copies

326

for f in removed:

326

for f in removed:

327

if f in newcopies:

327

if f in newcopies:

328

if newcopies is copies:

328

if newcopies is copies:

329

# copy on write to avoid affecting potential other

329

# copy on write to avoid affecting potential other

330

# branches. when there are no other branches, this

330

# branches. when there are no other branches, this

331

# could be avoided.

331

# could be avoided.

332

newcopies = copies.copy()

332

newcopies = copies.copy()

333

del newcopies[f]

333

del newcopies[f]

334

othercopies = all_copies.get(c)

334

othercopies = all_copies.get(c)

335

if othercopies is None:

335

if othercopies is None:

336

all_copies[c] = newcopies

336

all_copies[c] = newcopies

337

else:

337

else:

338

# we are the second parent to work on c, we need to merge our

338

# we are the second parent to work on c, we need to merge our

339

# work with the other.

339

# work with the other.

340

#

340

#

341

# Unlike when copies are stored in the filelog, we consider

341

# Unlike when copies are stored in the filelog, we consider

342

# it a copy even if the destination already existed on the

342

# it a copy even if the destination already existed on the

343

# other branch. It's simply too expensive to check if the

343

# other branch. It's simply too expensive to check if the

344

# file existed in the manifest.

344

# file existed in the manifest.

345

#

345

#

346

# In case of conflict, parent 1 take precedence over parent 2.

346

# In case of conflict, parent 1 take precedence over parent 2.

347

# This is an arbitrary choice made anew when implementing

347

# This is an arbitrary choice made anew when implementing

348

# changeset based copies. It was made without regards with

348

# changeset based copies. It was made without regards with

349

# potential filelog related behavior.

349

# potential filelog related behavior.

350

if parent == 1:

350

if parent == 1:

351

othercopies.update(newcopies)

351

othercopies.update(newcopies)

352

else:

352

else:

353

newcopies.update(othercopies)

353

newcopies.update(othercopies)

354

all_copies[c] = newcopies

354

all_copies[c] = newcopies

355

return all_copies[targetrev]

355

return all_copies[targetrev]

356

357

358

def _forwardcopies(a, b, base=None, match=None):

358

def _forwardcopies(a, b, base=None, match=None):

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

360

361

if base is None:

361

if base is None:

362

base = a

362

base = a

363

match = a.repo().narrowmatch(match)

363

match = a.repo().narrowmatch(match)

364

# check for working copy

364

# check for working copy

365

if b.rev() is None:

365

if b.rev() is None:

366

cm = _committedforwardcopies(a, b.p1(), base, match)

366

cm = _committedforwardcopies(a, b.p1(), base, match)

367

# combine copies from dirstate if necessary

367

# combine copies from dirstate if necessary

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

369

else:

369

else:

370

copies = _committedforwardcopies(a, b, base, match)

370

copies = _committedforwardcopies(a, b, base, match)

371

return copies

371

return copies

372

373

374

def _backwardrenames(a, b, match):

374

def _backwardrenames(a, b, match):

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

376

return {}

376

return {}

377

378

# Even though we're not taking copies into account, 1:n rename situations

378

# Even though we're not taking copies into account, 1:n rename situations

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

380

# arbitrarily pick one of the renames.

380

# arbitrarily pick one of the renames.

381

# We don't want to pass in "match" here, since that would filter

381

# We don't want to pass in "match" here, since that would filter

382

# the destination by it. Since we're reversing the copies, we want

382

# the destination by it. Since we're reversing the copies, we want

383

# to filter the source instead.

383

# to filter the source instead.

384

f = _forwardcopies(b, a)

384

f = _forwardcopies(b, a)

385

r = {}

385

r = {}

386

for k, v in sorted(pycompat.iteritems(f)):

386

for k, v in sorted(pycompat.iteritems(f)):

387

if match and not match(v):

387

if match and not match(v):

388

continue

388

continue

389

# remove copies

389

# remove copies

390

if v in a:

390

if v in a:

391

continue

391

continue

392

r[v] = k

392

r[v] = k

393

return r

393

return r

394

395

396

def pathcopies(x, y, match=None):

396

def pathcopies(x, y, match=None):

397

"""find {dst@y: src@x} copy mapping for directed compare"""

397

"""find {dst@y: src@x} copy mapping for directed compare"""

398

repo = x._repo

398

repo = x._repo

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

400

if debug:

400

if debug:

401

repo.ui.debug(

401

repo.ui.debug(

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

403

)

403

)

404

if x == y or not x or not y:

404

if x == y or not x or not y:

405

return {}

405

return {}

406

a = y.ancestor(x)

406

a = y.ancestor(x)

407

if a == x:

407

if a == x:

408

if debug:

408

if debug:

409

repo.ui.debug(b'debug.copies: search mode: forward\n')

409

repo.ui.debug(b'debug.copies: search mode: forward\n')

410

if y.rev() is None and x == y.p1():

410

if y.rev() is None and x == y.p1():

411

# short-circuit to avoid issues with merge states

411

# short-circuit to avoid issues with merge states

412

return _dirstatecopies(repo, match)

412

return _dirstatecopies(repo, match)

413

copies = _forwardcopies(x, y, match=match)

413

copies = _forwardcopies(x, y, match=match)

414

elif a == y:

414

elif a == y:

415

if debug:

415

if debug:

416

repo.ui.debug(b'debug.copies: search mode: backward\n')

416

repo.ui.debug(b'debug.copies: search mode: backward\n')

417

copies = _backwardrenames(x, y, match=match)

417

copies = _backwardrenames(x, y, match=match)

418

else:

418

else:

419

if debug:

419

if debug:

420

repo.ui.debug(b'debug.copies: search mode: combined\n')

420

repo.ui.debug(b'debug.copies: search mode: combined\n')

421

base = None

421

base = None

422

if a.rev() != node.nullrev:

422

if a.rev() != node.nullrev:

423

base = x

423

base = x

424

copies = _chain(

424

copies = _chain(

425

_backwardrenames(x, a, match=match),

425

_backwardrenames(x, a, match=match),

426

_forwardcopies(a, y, base, match=match),

426

_forwardcopies(a, y, base, match=match),

427

)

427

)

428

_filter(x, y, copies)

428

_filter(x, y, copies)

429

return copies

429

return copies

430

431

432

def mergecopies(repo, c1, c2, base):

432

def mergecopies(repo, c1, c2, base):

433

"""

433

"""

434

Finds moves and copies between context c1 and c2 that are relevant for

434

Finds moves and copies between context c1 and c2 that are relevant for

435

merging. 'base' will be used as the merge base.

435

merging. 'base' will be used as the merge base.

436

437

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

437

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

438

files that were moved/ copied in one merge parent and modified in another.

438

files that were moved/ copied in one merge parent and modified in another.

439

For example:

439

For example:

440

441

o ---> 4 another commit

441

o ---> 4 another commit

442

|

442

|

443

| o ---> 3 commit that modifies a.txt

443

| o ---> 3 commit that modifies a.txt

444

| /

444

| /

445

o / ---> 2 commit that moves a.txt to b.txt

445

o / ---> 2 commit that moves a.txt to b.txt

446

|/

446

|/

447

o ---> 1 merge base

447

o ---> 1 merge base

448

449

If we try to rebase revision 3 on revision 4, since there is no a.txt in

449

If we try to rebase revision 3 on revision 4, since there is no a.txt in

450

revision 4, and if user have copytrace disabled, we prints the following

450

revision 4, and if user have copytrace disabled, we prints the following

451

message:

451

message:

452

453

```other changed <file> which local deleted```

453

```other changed <file> which local deleted```

454

455

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

455

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

456

"dirmove".

456

"dirmove".

457

458

"copy" is a mapping from destination name -> source name,

458

"copy" is a mapping from destination name -> source name,

459

where source is in c1 and destination is in c2 or vice-versa.

459

where source is in c1 and destination is in c2 or vice-versa.

460

461

"movewithdir" is a mapping from source name -> destination name,

461

"movewithdir" is a mapping from source name -> destination name,

462

where the file at source present in one context but not the other

462

where the file at source present in one context but not the other

463

needs to be moved to destination by the merge process, because the

463

needs to be moved to destination by the merge process, because the

464

other context moved the directory it is in.

464

other context moved the directory it is in.

465

466

"diverge" is a mapping of source name -> list of destination names

466

"diverge" is a mapping of source name -> list of destination names

467

for divergent renames.

467

for divergent renames.

468

469

"renamedelete" is a mapping of source name -> list of destination

469

"renamedelete" is a mapping of source name -> list of destination

470

names for files deleted in c1 that were renamed in c2 or vice-versa.

470

names for files deleted in c1 that were renamed in c2 or vice-versa.

471

472

"dirmove" is a mapping of detected source dir -> destination dir renames.

472

"dirmove" is a mapping of detected source dir -> destination dir renames.

473

This is needed for handling changes to new files previously grafted into

473

This is needed for handling changes to new files previously grafted into

474

renamed directories.

474

renamed directories.

475

476

This function calls different copytracing algorithms based on config.

476

This function calls different copytracing algorithms based on config.

477

"""

477

"""

478

# avoid silly behavior for update from empty dir

478

# avoid silly behavior for update from empty dir

479

if not c1 or not c2 or c1 == c2:

479

if not c1 or not c2 or c1 == c2:

480

return {}, {}, {}, {}, {}

480

return {}, {}, {}, {}, {}

481

482

narrowmatch = c1.repo().narrowmatch()

482

narrowmatch = c1.repo().narrowmatch()

483

484

# avoid silly behavior for parent -> working dir

484

# avoid silly behavior for parent -> working dir

485

if c2.node() is None and c1.node() == repo.dirstate.p1():

485

if c2.node() is None and c1.node() == repo.dirstate.p1():

486

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

486

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

487

488

copytracing = repo.ui.config(b'experimental', b'copytrace')

488

copytracing = repo.ui.config(b'experimental', b'copytrace')

489

if stringutil.parsebool(copytracing) is False:

489

if stringutil.parsebool(copytracing) is False:

490

# stringutil.parsebool() returns None when it is unable to parse the

490

# stringutil.parsebool() returns None when it is unable to parse the

491

# value, so we should rely on making sure copytracing is on such cases

491

# value, so we should rely on making sure copytracing is on such cases

492

return {}, {}, {}, {}, {}

492

return {}, {}, {}, {}, {}

493

494

if usechangesetcentricalgo(repo):

494

if usechangesetcentricalgo(repo):

495

# The heuristics don't make sense when we need changeset-centric algos

495

# The heuristics don't make sense when we need changeset-centric algos

496

return _fullcopytracing(repo, c1, c2, base)

496

return _fullcopytracing(repo, c1, c2, base)

497

498

# Copy trace disabling is explicitly below the node == p1 logic above

498

# Copy trace disabling is explicitly below the node == p1 logic above

499

# because the logic above is required for a simple copy to be kept across a

499

# because the logic above is required for a simple copy to be kept across a

500

# rebase.

500

# rebase.

501

if copytracing == b'heuristics':

501

if copytracing == b'heuristics':

502

# Do full copytracing if only non-public revisions are involved as

502

# Do full copytracing if only non-public revisions are involved as

503

# that will be fast enough and will also cover the copies which could

503

# that will be fast enough and will also cover the copies which could

504

# be missed by heuristics

504

# be missed by heuristics

505

if _isfullcopytraceable(repo, c1, base):

505

if _isfullcopytraceable(repo, c1, base):

506

return _fullcopytracing(repo, c1, c2, base)

506

return _fullcopytracing(repo, c1, c2, base)

507

return _heuristicscopytracing(repo, c1, c2, base)

507

return _heuristicscopytracing(repo, c1, c2, base)

508

else:

508

else:

509

return _fullcopytracing(repo, c1, c2, base)

509

return _fullcopytracing(repo, c1, c2, base)

510

511

512

def _isfullcopytraceable(repo, c1, base):

512

def _isfullcopytraceable(repo, c1, base):

513

""" Checks that if base, source and destination are all no-public branches,

513

""" Checks that if base, source and destination are all no-public branches,

514

if yes let's use the full copytrace algorithm for increased capabilities

514

if yes let's use the full copytrace algorithm for increased capabilities

515

since it will be fast enough.

515

since it will be fast enough.

516

517

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

517

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

518

number of changesets from c1 to base such that if number of changesets are

518

number of changesets from c1 to base such that if number of changesets are

519

more than the limit, full copytracing algorithm won't be used.

519

more than the limit, full copytracing algorithm won't be used.

520

"""

520

"""

521

if c1.rev() is None:

521

if c1.rev() is None:

522

c1 = c1.p1()

522

c1 = c1.p1()

523

if c1.mutable() and base.mutable():

523

if c1.mutable() and base.mutable():

524

sourcecommitlimit = repo.ui.configint(

524

sourcecommitlimit = repo.ui.configint(

525

b'experimental', b'copytrace.sourcecommitlimit'

525

b'experimental', b'copytrace.sourcecommitlimit'

526

)

526

)

527

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

527

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

528

return commits < sourcecommitlimit

528

return commits < sourcecommitlimit

529

return False

529

return False

530

531

532

def _checksinglesidecopies(

532

def _checksinglesidecopies(

533

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

533

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

534

):

534

):

535

if src not in m2:

535

if src not in m2:

536

# deleted on side 2

536

# deleted on side 2

537

if src not in m1:

537

if src not in m1:

538

# renamed on side 1, deleted on side 2

538

# renamed on side 1, deleted on side 2

539

renamedelete[src] = dsts1

539

renamedelete[src] = dsts1

540

elif m2[src] != mb[src]:

540

elif m2[src] != mb[src]:

541

if not _related(c2[src], base[src]):

541

if not _related(c2[src], base[src]):

542

return

542

return

543

# modified on side 2

543

# modified on side 2

544

for dst in dsts1:

544

for dst in dsts1:

545

if dst not in m2:

545

if dst not in m2:

546

# dst not added on side 2 (handle as regular

546

# dst not added on side 2 (handle as regular

547

# "both created" case in manifestmerge otherwise)

547

# "both created" case in manifestmerge otherwise)

548

copy[dst] = src

548

copy[dst] = src

549

550

551

def _fullcopytracing(repo, c1, c2, base):

551

def _fullcopytracing(repo, c1, c2, base):

552

""" The full copytracing algorithm which finds all the new files that were

552

""" The full copytracing algorithm which finds all the new files that were

553

added from merge base up to the top commit and for each file it checks if

553

added from merge base up to the top commit and for each file it checks if

554

this file was copied from another file.

554

this file was copied from another file.

555

556

This is pretty slow when a lot of changesets are involved but will track all

556

This is pretty slow when a lot of changesets are involved but will track all

557

the copies.

557

the copies.

558

"""

558

"""

559

m1 = c1.manifest()

559

m1 = c1.manifest()

560

m2 = c2.manifest()

560

m2 = c2.manifest()

561

mb = base.manifest()

561

mb = base.manifest()

562

563

copies1 = pathcopies(base, c1)

563

copies1 = pathcopies(base, c1)

564

copies2 = pathcopies(base, c2)

564

copies2 = pathcopies(base, c2)

565

566

inversecopies1 = {}

566

inversecopies1 = {}

567

inversecopies2 = {}

567

inversecopies2 = {}

568

for dst, src in copies1.items():

568

for dst, src in copies1.items():

569

inversecopies1.setdefault(src, []).append(dst)

569

inversecopies1.setdefault(src, []).append(dst)

570

for dst, src in copies2.items():

570

for dst, src in copies2.items():

571

inversecopies2.setdefault(src, []).append(dst)

571

inversecopies2.setdefault(src, []).append(dst)

572

573

copy = {}

573

copy = {}

574

diverge = {}

574

diverge = {}

575

renamedelete = {}

575

renamedelete = {}

576

allsources = set(inversecopies1) | set(inversecopies2)

576

allsources = set(inversecopies1) | set(inversecopies2)

577

for src in allsources:

577

for src in allsources:

578

dsts1 = inversecopies1.get(src)

578

dsts1 = inversecopies1.get(src)

579

dsts2 = inversecopies2.get(src)

579

dsts2 = inversecopies2.get(src)

580

if dsts1 and dsts2:

580

if dsts1 and dsts2:

581

# copied/renamed on both sides

581

# copied/renamed on both sides

582

if src not in m1 and src not in m2:

582

if src not in m1 and src not in m2:

583

# renamed on both sides

583

# renamed on both sides

584

dsts1 = set(dsts1)

584

dsts1 = set(dsts1)

585

dsts2 = set(dsts2)

585

dsts2 = set(dsts2)

586

# If there's some overlap in the rename destinations, we

586

# If there's some overlap in the rename destinations, we

587

# consider it not divergent. For example, if side 1 copies 'a'

587

# consider it not divergent. For example, if side 1 copies 'a'

588

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

588

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

589

# and 'd' and deletes 'a'.

589

# and 'd' and deletes 'a'.

590

if dsts1 & dsts2:

590

if dsts1 & dsts2:

591

for dst in dsts1 & dsts2:

591

for dst in dsts1 & dsts2:

592

copy[dst] = src

592

copy[dst] = src

593

else:

593

else:

594

diverge[src] = sorted(dsts1 | dsts2)

594

diverge[src] = sorted(dsts1 | dsts2)

595

elif src in m1 and src in m2:

595

elif src in m1 and src in m2:

596

# copied on both sides

596

# copied on both sides

597

dsts1 = set(dsts1)

597

dsts1 = set(dsts1)

598

dsts2 = set(dsts2)

598

dsts2 = set(dsts2)

599

for dst in dsts1 & dsts2:

599

for dst in dsts1 & dsts2:

600

copy[dst] = src

600

copy[dst] = src

601

# TODO: Handle cases where it was renamed on one side and copied

601

# TODO: Handle cases where it was renamed on one side and copied

602

# on the other side

602

# on the other side

603

elif dsts1:

603

elif dsts1:

604

# copied/renamed only on side 1

604

# copied/renamed only on side 1

605

_checksinglesidecopies(

605

_checksinglesidecopies(

606

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

606

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

607

)

607

)

608

elif dsts2:

608

elif dsts2:

609

# copied/renamed only on side 2

609

# copied/renamed only on side 2

610

_checksinglesidecopies(

610

_checksinglesidecopies(

611

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

611

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

612

)

612

)

613

614

renamedeleteset = set()

614

renamedeleteset = set()

615

divergeset = set()

615

divergeset = set()

616

for dsts in diverge.values():

616

for dsts in diverge.values():

617

divergeset.update(dsts)

617

divergeset.update(dsts)

618

for dsts in renamedelete.values():

618

for dsts in renamedelete.values():

619

renamedeleteset.update(dsts)

619

renamedeleteset.update(dsts)

620

621

# find interesting file sets from manifests

621

# find interesting file sets from manifests

622

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

622

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

623

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

623

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

624

u1 = sorted(addedinm1 - addedinm2)

624

u1 = sorted(addedinm1 - addedinm2)

625

u2 = sorted(addedinm2 - addedinm1)

625

u2 = sorted(addedinm2 - addedinm1)

626

627

header = b" unmatched files in %s"

627

header = b" unmatched files in %s"

628

if u1:

628

if u1:

629

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

629

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

630

if u2:

630

if u2:

631

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

631

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

632

633

fullcopy = copies1.copy()

633

fullcopy = copies1.copy()

634

fullcopy.update(copies2)

634

fullcopy.update(copies2)

635

if not fullcopy:

635

if not fullcopy:

636

return copy, {}, diverge, renamedelete, {}

636

return copy, {}, diverge, renamedelete, {}

637

638

if repo.ui.debugflag:

638

if repo.ui.debugflag:

639

repo.ui.debug(

639

repo.ui.debug(

640

b" all copies found (* = to merge, ! = divergent, "

640

b" all copies found (* = to merge, ! = divergent, "

641

b"% = renamed and deleted):\n"

641

b"% = renamed and deleted):\n"

642

)

642

)

643

for f in sorted(fullcopy):

643

for f in sorted(fullcopy):

644

note = b""

644

note = b""

645

if f in copy:

645

if f in copy:

646

note += b"*"

646

note += b"*"

647

if f in divergeset:

647

if f in divergeset:

648

note += b"!"

648

note += b"!"

649

if f in renamedeleteset:

649

if f in renamedeleteset:

650

note += b"%"

650

note += b"%"

651

repo.ui.debug(

651

repo.ui.debug(

652

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

652

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

653

)

653

)

654

del divergeset

654

del divergeset

655

656

repo.ui.debug(b" checking for directory renames\n")

656

repo.ui.debug(b" checking for directory renames\n")

657

658

# generate a directory move map

658

# generate a directory move map

659

d1, d2 = c1.dirs(), c2.dirs()

659

d1, d2 = c1.dirs(), c2.dirs()

660

invalid = set()

660

invalid = set()

661

dirmove = {}

661

dirmove = {}

662

663

# examine each file copy for a potential directory move, which is

663

# examine each file copy for a potential directory move, which is

664

# when all the files in a directory are moved to a new directory

664

# when all the files in a directory are moved to a new directory

665

for dst, src in pycompat.iteritems(fullcopy):

665

for dst, src in pycompat.iteritems(fullcopy):

666

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

666

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

667

if dsrc in invalid:

667

if dsrc in invalid:

668

# already seen to be uninteresting

668

# already seen to be uninteresting

669

continue

669

continue

670

elif dsrc in d1 and ddst in d1:

670

elif dsrc in d1 and ddst in d1:

671

# directory wasn't entirely moved locally

671

# directory wasn't entirely moved locally

672

invalid.add(dsrc)

672

invalid.add(dsrc)

673

elif dsrc in d2 and ddst in d2:

673

elif dsrc in d2 and ddst in d2:

674

# directory wasn't entirely moved remotely

674

# directory wasn't entirely moved remotely

675

invalid.add(dsrc)

675

invalid.add(dsrc)

676

elif dsrc in dirmove and dirmove[dsrc] != ddst:

676

elif dsrc in dirmove and dirmove[dsrc] != ddst:

677

# files from the same directory moved to two different places

677

# files from the same directory moved to two different places

678

invalid.add(dsrc)

678

invalid.add(dsrc)

679

else:

679

else:

680

# looks good so far

680

# looks good so far

681

dirmove[dsrc] = ddst

681

dirmove[dsrc] = ddst

682

683

for i in invalid:

683

for i in invalid:

684

if i in dirmove:

684

if i in dirmove:

685

del dirmove[i]

685

del dirmove[i]

686

del d1, d2, invalid

686

del d1, d2, invalid

687

688

if not dirmove:

688

if not dirmove:

689

return copy, {}, diverge, renamedelete, {}

689

return copy, {}, diverge, renamedelete, {}

690

691

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

691

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

692

693

for d in dirmove:

693

for d in dirmove:

694

repo.ui.debug(

694

repo.ui.debug(

695

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

695

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

696

)

696

)

697

698

movewithdir = {}

698

movewithdir = {}

699

# check unaccounted nonoverlapping files against directory moves

699

# check unaccounted nonoverlapping files against directory moves

700

for f in u1 + u2:

700

for f in u1 + u2:

701

if f not in fullcopy:

701

if f not in fullcopy:

702

for d in dirmove:

702

for d in dirmove:

703

if f.startswith(d):

703

if f.startswith(d):

704

# new file added in a directory that was moved, move it

704

# new file added in a directory that was moved, move it

705

df = dirmove[d] + f[len(d) :]

705

df = dirmove[d] + f[len(d) :]

706

if df not in copy:

706

if df not in copy:

707

movewithdir[f] = df

707

movewithdir[f] = df

708

repo.ui.debug(

708

repo.ui.debug(

709

b" pending file src: '%s' -> dst: '%s'\n"

709

b" pending file src: '%s' -> dst: '%s'\n"

710

% (f, df)

710

% (f, df)

711

)

711

)

712

break

712

break

713

714

return copy, movewithdir, diverge, renamedelete, dirmove

714

return copy, movewithdir, diverge, renamedelete, dirmove

715

716

717

def _heuristicscopytracing(repo, c1, c2, base):

717

def _heuristicscopytracing(repo, c1, c2, base):

718

""" Fast copytracing using filename heuristics

718

""" Fast copytracing using filename heuristics

719

720

Assumes that moves or renames are of following two types:

720

Assumes that moves or renames are of following two types:

721

722

1) Inside a directory only (same directory name but different filenames)

722

1) Inside a directory only (same directory name but different filenames)

723

2) Move from one directory to another

723

2) Move from one directory to another

724

(same filenames but different directory names)

724

(same filenames but different directory names)

725

726

Works only when there are no merge commits in the "source branch".

726

Works only when there are no merge commits in the "source branch".

727

Source branch is commits from base up to c2 not including base.

727

Source branch is commits from base up to c2 not including base.

728

729

If merge is involved it fallbacks to _fullcopytracing().

729

If merge is involved it fallbacks to _fullcopytracing().

730

731

Can be used by setting the following config:

731

Can be used by setting the following config:

732

733

[experimental]

733

[experimental]

734

copytrace = heuristics

734

copytrace = heuristics

735

736

In some cases the copy/move candidates found by heuristics can be very large

736

In some cases the copy/move candidates found by heuristics can be very large

737

in number and that will make the algorithm slow. The number of possible

737

in number and that will make the algorithm slow. The number of possible

738

candidates to check can be limited by using the config

738

candidates to check can be limited by using the config

739

`experimental.copytrace.movecandidateslimit` which defaults to 100.

739

`experimental.copytrace.movecandidateslimit` which defaults to 100.

740

"""

740

"""

741

742

if c1.rev() is None:

742

if c1.rev() is None:

743

c1 = c1.p1()

743

c1 = c1.p1()

744

if c2.rev() is None:

744

if c2.rev() is None:

745

c2 = c2.p1()

745

c2 = c2.p1()

746

747

copies = {}

747

copies = {}

748

749

changedfiles = set()

749

changedfiles = set()

750

m1 = c1.manifest()

750

m1 = c1.manifest()

751

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

751

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

752

# If base is not in c2 branch, we switch to fullcopytracing

752

# If base is not in c2 branch, we switch to fullcopytracing

753

repo.ui.debug(

753

repo.ui.debug(

754

b"switching to full copytracing as base is not "

754

b"switching to full copytracing as base is not "

755

b"an ancestor of c2\n"

755

b"an ancestor of c2\n"

756

)

756

)

757

return _fullcopytracing(repo, c1, c2, base)

757

return _fullcopytracing(repo, c1, c2, base)

758

759

ctx = c2

759

ctx = c2

760

while ctx != base:

760

while ctx != base:

761

if len(ctx.parents()) == 2:

761

if len(ctx.parents()) == 2:

762

# To keep things simple let's not handle merges

762

# To keep things simple let's not handle merges

763

repo.ui.debug(b"switching to full copytracing because of merges\n")

763

repo.ui.debug(b"switching to full copytracing because of merges\n")

764

return _fullcopytracing(repo, c1, c2, base)

764

return _fullcopytracing(repo, c1, c2, base)

765

changedfiles.update(ctx.files())

765

changedfiles.update(ctx.files())

766

ctx = ctx.p1()

766

ctx = ctx.p1()

767

768

cp = _forwardcopies(base, c2)

768

cp = _forwardcopies(base, c2)

769

for dst, src in pycompat.iteritems(cp):

769

for dst, src in pycompat.iteritems(cp):

770

if src in m1:

770

if src in m1:

771

copies[dst] = src

771

copies[dst] = src

772

773

# file is missing if it isn't present in the destination, but is present in

773

# file is missing if it isn't present in the destination, but is present in

774

# the base and present in the source.

774

# the base and present in the source.

775

# Presence in the base is important to exclude added files, presence in the

775

# Presence in the base is important to exclude added files, presence in the

776

# source is important to exclude removed files.

776

# source is important to exclude removed files.

777

filt = lambda f: f not in m1 and f in base and f in c2

777

filt = lambda f: f not in m1 and f in base and f in c2

778

missingfiles = [f for f in changedfiles if filt(f)]

778

missingfiles = [f for f in changedfiles if filt(f)]

779

780

if missingfiles:

780

if missingfiles:

781

basenametofilename = collections.defaultdict(list)

781

basenametofilename = collections.defaultdict(list)

782

dirnametofilename = collections.defaultdict(list)

782

dirnametofilename = collections.defaultdict(list)

783

784

for f in m1.filesnotin(base.manifest()):

784

for f in m1.filesnotin(base.manifest()):

785

basename = os.path.basename(f)

785

basename = os.path.basename(f)

786

dirname = os.path.dirname(f)

786

dirname = os.path.dirname(f)

787

basenametofilename[basename].append(f)

787

basenametofilename[basename].append(f)

788

dirnametofilename[dirname].append(f)

788

dirnametofilename[dirname].append(f)

789

790

for f in missingfiles:

790

for f in missingfiles:

791

basename = os.path.basename(f)

791

basename = os.path.basename(f)

792

dirname = os.path.dirname(f)

792

dirname = os.path.dirname(f)

793

samebasename = basenametofilename[basename]

793

samebasename = basenametofilename[basename]

794

samedirname = dirnametofilename[dirname]

794

samedirname = dirnametofilename[dirname]

795

movecandidates = samebasename + samedirname

795

movecandidates = samebasename + samedirname

796

# f is guaranteed to be present in c2, that's why

796

# f is guaranteed to be present in c2, that's why

797

# c2.filectx(f) won't fail

797

# c2.filectx(f) won't fail

798

f2 = c2.filectx(f)

798

f2 = c2.filectx(f)

799

# we can have a lot of candidates which can slow down the heuristics

799

# we can have a lot of candidates which can slow down the heuristics

800

# config value to limit the number of candidates moves to check

800

# config value to limit the number of candidates moves to check

801

maxcandidates = repo.ui.configint(

801

maxcandidates = repo.ui.configint(

802

b'experimental', b'copytrace.movecandidateslimit'

802

b'experimental', b'copytrace.movecandidateslimit'

803

)

803

)

804

805

if len(movecandidates) > maxcandidates:

805

if len(movecandidates) > maxcandidates:

806

repo.ui.status(

806

repo.ui.status(

807

_(

807

_(

808

b"skipping copytracing for '%s', more "

808

b"skipping copytracing for '%s', more "

809

b"candidates than the limit: %d\n"

809

b"candidates than the limit: %d\n"

810

)

810

)

811

% (f, len(movecandidates))

811

% (f, len(movecandidates))

812

)

812

)

813

continue

813

continue

814

815

for candidate in movecandidates:

815

for candidate in movecandidates:

816

f1 = c1.filectx(candidate)

816

f1 = c1.filectx(candidate)

817

if _related(f1, f2):

817

if _related(f1, f2):

818

# if there are a few related copies then we'll merge

818

# if there are a few related copies then we'll merge

819

# changes into all of them. This matches the behaviour

819

# changes into all of them. This matches the behaviour

820

# of upstream copytracing

820

# of upstream copytracing

821

copies[candidate] = f

821

copies[candidate] = f

822

823

return copies, {}, {}, {}, {}

823

return copies, {}, {}, {}, {}

824

825

826

def _related(f1, f2):

826

def _related(f1, f2):

827

"""return True if f1 and f2 filectx have a common ancestor

827

"""return True if f1 and f2 filectx have a common ancestor

828

829

Walk back to common ancestor to see if the two files originate

829

Walk back to common ancestor to see if the two files originate

830

from the same file. Since workingfilectx's rev() is None it messes

830

from the same file. Since workingfilectx's rev() is None it messes

831

up the integer comparison logic, hence the pre-step check for

831

up the integer comparison logic, hence the pre-step check for

832

None (f1 and f2 can only be workingfilectx's initially).

832

None (f1 and f2 can only be workingfilectx's initially).

833

"""

833

"""

834

835

if f1 == f2:

835

if f1 == f2:

836

return True # a match

836

return True # a match

837

838

g1, g2 = f1.ancestors(), f2.ancestors()

838

g1, g2 = f1.ancestors(), f2.ancestors()

839

try:

839

try:

840

f1r, f2r = f1.linkrev(), f2.linkrev()

840

f1r, f2r = f1.linkrev(), f2.linkrev()

841

842

if f1r is None:

842

if f1r is None:

843

f1 = next(g1)

843

f1 = next(g1)

844

if f2r is None:

844

if f2r is None:

845

f2 = next(g2)

845

f2 = next(g2)

846

847

while True:

847

while True:

848

f1r, f2r = f1.linkrev(), f2.linkrev()

848

f1r, f2r = f1.linkrev(), f2.linkrev()

849

if f1r > f2r:

849

if f1r > f2r:

850

f1 = next(g1)

850

f1 = next(g1)

851

elif f2r > f1r:

851

elif f2r > f1r:

852

f2 = next(g2)

852

f2 = next(g2)

853

else: # f1 and f2 point to files in the same linkrev

853

else: # f1 and f2 point to files in the same linkrev

854

return f1 == f2 # true if they point to the same file

854

return f1 == f2 # true if they point to the same file

855

except StopIteration:

855

except StopIteration:

856

return False

856

return False

857

858

859

def graftcopies(wctx, ctx, base):

859

def graftcopies(wctx, ctx, base):

860

"""reproduce copies between base and ctx in the wctx~~"""~~

860

"""reproduce copies between base and ctx in the wctx

861

862

Unlike mergecopies(), this function will only consider copies between base

863

and ctx; it will ignore copies between base and wctx. Also unlike

864

mergecopies(), this function will apply copies to the working copy (instead

865

of just returning information about the copies). That makes it cheaper

866

(especially in the common case of base==ctx.p1()) and useful also when

867

experimental.copytrace=off.

868

869

merge.update() will have already marked most copies, but it will only

870

mark copies if it thinks the source files are related (see

871

merge._related()). It will also not mark copies if the file wasn't modified

872

on the local side. This function adds the copies that were "missed"

873

by merge.update().

874

"""

861

new_copies = pathcopies(base, ctx)

875

new_copies = pathcopies(base, ctx)

862

_filter(wctx.p1(), wctx, new_copies)

876

_filter(wctx.p1(), wctx, new_copies)

863

for dst, src in pycompat.iteritems(new_copies):

877

for dst, src in pycompat.iteritems(new_copies):

864

wctx[dst].markcopied(src)

878

wctx[dst].markcopied(src)

865

879

866

880

867

def computechangesetfilesadded(ctx):

881

def computechangesetfilesadded(ctx):

868

"""return the list of files added in a changeset

882

"""return the list of files added in a changeset

869

"""

883

"""

870

added = []

884

added = []

871

for f in ctx.files():

885

for f in ctx.files():

872

if not any(f in p for p in ctx.parents()):

886

if not any(f in p for p in ctx.parents()):

873

added.append(f)

887

added.append(f)

874

return added

888

return added

875

889

876

890

877

def computechangesetfilesremoved(ctx):

891

def computechangesetfilesremoved(ctx):

878

"""return the list of files removed in a changeset

892

"""return the list of files removed in a changeset

879

"""

893

"""

880

removed = []

894

removed = []

881

for f in ctx.files():

895

for f in ctx.files():

882

if f not in ctx:

896

if f not in ctx:

883

removed.append(f)

897

removed.append(f)

884

return removed

898

return removed

885

899

886

900

887

def computechangesetcopies(ctx):

901

def computechangesetcopies(ctx):

888

"""return the copies data for a changeset

902

"""return the copies data for a changeset

889

903

890

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

904

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

891

905

892

Each dictionnary are in the form: `{newname: oldname}`

906

Each dictionnary are in the form: `{newname: oldname}`

893

"""

907

"""

894

p1copies = {}

908

p1copies = {}

895

p2copies = {}

909

p2copies = {}

896

p1 = ctx.p1()

910

p1 = ctx.p1()

897

p2 = ctx.p2()

911

p2 = ctx.p2()

898

narrowmatch = ctx._repo.narrowmatch()

912

narrowmatch = ctx._repo.narrowmatch()

899

for dst in ctx.files():

913

for dst in ctx.files():

900

if not narrowmatch(dst) or dst not in ctx:

914

if not narrowmatch(dst) or dst not in ctx:

901

continue

915

continue

902

copied = ctx[dst].renamed()

916

copied = ctx[dst].renamed()

903

if not copied:

917

if not copied:

904

continue

918

continue

905

src, srcnode = copied

919

src, srcnode = copied

906

if src in p1 and p1[src].filenode() == srcnode:

920

if src in p1 and p1[src].filenode() == srcnode:

907

p1copies[dst] = src

921

p1copies[dst] = src

908

elif src in p2 and p2[src].filenode() == srcnode:

922

elif src in p2 and p2[src].filenode() == srcnode:

909

p2copies[dst] = src

923

p2copies[dst] = src

910

return p1copies, p2copies

924

return p1copies, p2copies

911

925

912

926

913

def encodecopies(files, copies):

927

def encodecopies(files, copies):

914

items = []

928

items = []

915

for i, dst in enumerate(files):

929

for i, dst in enumerate(files):

916

if dst in copies:

930

if dst in copies:

917

items.append(b'%d\0%s' % (i, copies[dst]))

931

items.append(b'%d\0%s' % (i, copies[dst]))

918

if len(items) != len(copies):

932

if len(items) != len(copies):

919

raise error.ProgrammingError(

933

raise error.ProgrammingError(

920

b'some copy targets missing from file list'

934

b'some copy targets missing from file list'

921

)

935

)

922

return b"\n".join(items)

936

return b"\n".join(items)

923

937

924

938

925

def decodecopies(files, data):

939

def decodecopies(files, data):

926

try:

940

try:

927

copies = {}

941

copies = {}

928

if not data:

942

if not data:

929

return copies

943

return copies

930

for l in data.split(b'\n'):

944

for l in data.split(b'\n'):

931

strindex, src = l.split(b'\0')

945

strindex, src = l.split(b'\0')

932

i = int(strindex)

946

i = int(strindex)

933

dst = files[i]

947

dst = files[i]

934

copies[dst] = src

948

copies[dst] = src

935

return copies

949

return copies

936

except (ValueError, IndexError):

950

except (ValueError, IndexError):

937

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

951

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

938

# used different syntax for the value.

952

# used different syntax for the value.

939

return None

953

return None

940

954

941

955

942

def encodefileindices(files, subset):

956

def encodefileindices(files, subset):

943

subset = set(subset)

957

subset = set(subset)

944

indices = []

958

indices = []

945

for i, f in enumerate(files):

959

for i, f in enumerate(files):

946

if f in subset:

960

if f in subset:

947

indices.append(b'%d' % i)

961

indices.append(b'%d' % i)

948

return b'\n'.join(indices)

962

return b'\n'.join(indices)

949

963

950

964

951

def decodefileindices(files, data):

965

def decodefileindices(files, data):

952

try:

966

try:

953

subset = []

967

subset = []

954

if not data:

968

if not data:

955

return subset

969

return subset

956

for strindex in data.split(b'\n'):

970

for strindex in data.split(b'\n'):

957

i = int(strindex)

971

i = int(strindex)

958

if i < 0 or i >= len(files):

972

if i < 0 or i >= len(files):

959

return None

973

return None

960

subset.append(files[i])

974

subset.append(files[i])

961

return subset

975

return subset

962

except (ValueError, IndexError):

976

except (ValueError, IndexError):

963

# Perhaps someone had chosen the same key name (e.g. "added") and

977

# Perhaps someone had chosen the same key name (e.g. "added") and

964

# used different syntax for the value.

978

# used different syntax for the value.

965

return None

979

return None

966

980

967

981

968

def _getsidedata(srcrepo, rev):

982

def _getsidedata(srcrepo, rev):

969

ctx = srcrepo[rev]

983

ctx = srcrepo[rev]

970

filescopies = computechangesetcopies(ctx)

984

filescopies = computechangesetcopies(ctx)

971

filesadded = computechangesetfilesadded(ctx)

985

filesadded = computechangesetfilesadded(ctx)

972

filesremoved = computechangesetfilesremoved(ctx)

986

filesremoved = computechangesetfilesremoved(ctx)

973

sidedata = {}

987

sidedata = {}

974

if any([filescopies, filesadded, filesremoved]):

988

if any([filescopies, filesadded, filesremoved]):

975

sortedfiles = sorted(ctx.files())

989

sortedfiles = sorted(ctx.files())

976

p1copies, p2copies = filescopies

990

p1copies, p2copies = filescopies

977

p1copies = encodecopies(sortedfiles, p1copies)

991

p1copies = encodecopies(sortedfiles, p1copies)

978

p2copies = encodecopies(sortedfiles, p2copies)

992

p2copies = encodecopies(sortedfiles, p2copies)

979

filesadded = encodefileindices(sortedfiles, filesadded)

993

filesadded = encodefileindices(sortedfiles, filesadded)

980

filesremoved = encodefileindices(sortedfiles, filesremoved)

994

filesremoved = encodefileindices(sortedfiles, filesremoved)

981

if p1copies:

995

if p1copies:

982

sidedata[sidedatamod.SD_P1COPIES] = p1copies

996

sidedata[sidedatamod.SD_P1COPIES] = p1copies

983

if p2copies:

997

if p2copies:

984

sidedata[sidedatamod.SD_P2COPIES] = p2copies

998

sidedata[sidedatamod.SD_P2COPIES] = p2copies

985

if filesadded:

999

if filesadded:

986

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1000

sidedata[sidedatamod.SD_FILESADDED] = filesadded

987

if filesremoved:

1001

if filesremoved:

988

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1002

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

989

return sidedata

1003

return sidedata

990

1004

991

1005

992

def getsidedataadder(srcrepo, destrepo):

1006

def getsidedataadder(srcrepo, destrepo):

993

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

1007

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

994

if pycompat.iswindows or not use_w:

1008

if pycompat.iswindows or not use_w:

995

return _get_simple_sidedata_adder(srcrepo, destrepo)

1009

return _get_simple_sidedata_adder(srcrepo, destrepo)

996

else:

1010

else:

997

return _get_worker_sidedata_adder(srcrepo, destrepo)

1011

return _get_worker_sidedata_adder(srcrepo, destrepo)

998

1012

999

1013

1000

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1014

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1001

"""The function used by worker precomputing sidedata

1015

"""The function used by worker precomputing sidedata

1002

1016

1003

It read an input queue containing revision numbers

1017

It read an input queue containing revision numbers

1004

It write in an output queue containing (rev, <sidedata-map>)

1018

It write in an output queue containing (rev, <sidedata-map>)

1005

1019

1006

The `None` input value is used as a stop signal.

1020

The `None` input value is used as a stop signal.

1007

1021

1008

The `tokens` semaphore is user to avoid having too many unprocessed

1022

The `tokens` semaphore is user to avoid having too many unprocessed

1009

entries. The workers needs to acquire one token before fetching a task.

1023

entries. The workers needs to acquire one token before fetching a task.

1010

They will be released by the consumer of the produced data.

1024

They will be released by the consumer of the produced data.

1011

"""

1025

"""

1012

tokens.acquire()

1026

tokens.acquire()

1013

rev = revs_queue.get()

1027

rev = revs_queue.get()

1014

while rev is not None:

1028

while rev is not None:

1015

data = _getsidedata(srcrepo, rev)

1029

data = _getsidedata(srcrepo, rev)

1016

sidedata_queue.put((rev, data))

1030

sidedata_queue.put((rev, data))

1017

tokens.acquire()

1031

tokens.acquire()

1018

rev = revs_queue.get()

1032

rev = revs_queue.get()

1019

# processing of `None` is completed, release the token.

1033

# processing of `None` is completed, release the token.

1020

tokens.release()

1034

tokens.release()

1021

1035

1022

1036

1023

BUFF_PER_WORKER = 50

1037

BUFF_PER_WORKER = 50

1024

1038

1025

1039

1026

def _get_worker_sidedata_adder(srcrepo, destrepo):

1040

def _get_worker_sidedata_adder(srcrepo, destrepo):

1027

"""The parallel version of the sidedata computation

1041

"""The parallel version of the sidedata computation

1028

1042

1029

This code spawn a pool of worker that precompute a buffer of sidedata

1043

This code spawn a pool of worker that precompute a buffer of sidedata

1030

before we actually need them"""

1044

before we actually need them"""

1031

# avoid circular import copies -> scmutil -> worker -> copies

1045

# avoid circular import copies -> scmutil -> worker -> copies

1032

from . import worker

1046

from . import worker

1033

1047

1034

nbworkers = worker._numworkers(srcrepo.ui)

1048

nbworkers = worker._numworkers(srcrepo.ui)

1035

1049

1036

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1050

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1037

revsq = multiprocessing.Queue()

1051

revsq = multiprocessing.Queue()

1038

sidedataq = multiprocessing.Queue()

1052

sidedataq = multiprocessing.Queue()

1039

1053

1040

assert srcrepo.filtername is None

1054

assert srcrepo.filtername is None

1041

# queue all tasks beforehand, revision numbers are small and it make

1055

# queue all tasks beforehand, revision numbers are small and it make

1042

# synchronisation simpler

1056

# synchronisation simpler

1043

#

1057

#

1044

# Since the computation for each node can be quite expensive, the overhead

1058

# Since the computation for each node can be quite expensive, the overhead

1045

# of using a single queue is not revelant. In practice, most computation

1059

# of using a single queue is not revelant. In practice, most computation

1046

# are fast but some are very expensive and dominate all the other smaller

1060

# are fast but some are very expensive and dominate all the other smaller

1047

# cost.

1061

# cost.

1048

for r in srcrepo.changelog.revs():

1062

for r in srcrepo.changelog.revs():

1049

revsq.put(r)

1063

revsq.put(r)

1050

# queue the "no more tasks" markers

1064

# queue the "no more tasks" markers

1051

for i in range(nbworkers):

1065

for i in range(nbworkers):

1052

revsq.put(None)

1066

revsq.put(None)

1053

1067

1054

allworkers = []

1068

allworkers = []

1055

for i in range(nbworkers):

1069

for i in range(nbworkers):

1056

args = (srcrepo, revsq, sidedataq, tokens)

1070

args = (srcrepo, revsq, sidedataq, tokens)

1057

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1071

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1058

allworkers.append(w)

1072

allworkers.append(w)

1059

w.start()

1073

w.start()

1060

1074

1061

# dictionnary to store results for revision higher than we one we are

1075

# dictionnary to store results for revision higher than we one we are

1062

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1076

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1063

# received, when shelve 43 for later use.

1077

# received, when shelve 43 for later use.

1064

staging = {}

1078

staging = {}

1065

1079

1066

def sidedata_companion(revlog, rev):

1080

def sidedata_companion(revlog, rev):

1067

sidedata = {}

1081

sidedata = {}

1068

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1082

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1069

# Is the data previously shelved ?

1083

# Is the data previously shelved ?

1070

sidedata = staging.pop(rev, None)

1084

sidedata = staging.pop(rev, None)

1071

if sidedata is None:

1085

if sidedata is None:

1072

# look at the queued result until we find the one we are lookig

1086

# look at the queued result until we find the one we are lookig

1073

# for (shelve the other ones)

1087

# for (shelve the other ones)

1074

r, sidedata = sidedataq.get()

1088

r, sidedata = sidedataq.get()

1075

while r != rev:

1089

while r != rev:

1076

staging[r] = sidedata

1090

staging[r] = sidedata

1077

r, sidedata = sidedataq.get()

1091

r, sidedata = sidedataq.get()

1078

tokens.release()

1092

tokens.release()

1079

return False, (), sidedata

1093

return False, (), sidedata

1080

1094

1081

return sidedata_companion

1095

return sidedata_companion

1082

1096

1083

1097

1084

def _get_simple_sidedata_adder(srcrepo, destrepo):

1098

def _get_simple_sidedata_adder(srcrepo, destrepo):

1085

"""The simple version of the sidedata computation

1099

"""The simple version of the sidedata computation

1086

1100

1087

It just compute it in the same thread on request"""

1101

It just compute it in the same thread on request"""

1088

1102

1089

def sidedatacompanion(revlog, rev):

1103

def sidedatacompanion(revlog, rev):

1090

sidedata = {}

1104

sidedata = {}

1091

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1105

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1092

sidedata = _getsidedata(srcrepo, rev)

1106

sidedata = _getsidedata(srcrepo, rev)

1093

return False, (), sidedata

1107

return False, (), sidedata

1094

1108

1095

return sidedatacompanion

1109

return sidedatacompanion

1096

1110

1097

1111

1098

def getsidedataremover(srcrepo, destrepo):

1112

def getsidedataremover(srcrepo, destrepo):

1099

def sidedatacompanion(revlog, rev):

1113

def sidedatacompanion(revlog, rev):

1100

f = ()

1114

f = ()

1101

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1115

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1102

if revlog.flags(rev) & REVIDX_SIDEDATA:

1116

if revlog.flags(rev) & REVIDX_SIDEDATA:

1103

f = (

1117

f = (

1104

sidedatamod.SD_P1COPIES,

1118

sidedatamod.SD_P1COPIES,

1105

sidedatamod.SD_P2COPIES,

1119

sidedatamod.SD_P2COPIES,

1106

sidedatamod.SD_FILESADDED,

1120

sidedatamod.SD_FILESADDED,

1107

sidedatamod.SD_FILESREMOVED,

1121

sidedatamod.SD_FILESREMOVED,

1108

)

1122

)

1109

return False, f, {}

1123

return False, f, {}

1110

1124

1111

return sidedatacompanion

1125

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import multiprocessing
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     # A small cache to avoid doing the work twice for merges
                     #
                     # In the vast majority of cases, if we ask information for a revision
                     # about 1 parent, we'll later ask it for the other. So it make sense to
                     # keep the information around when reaching the first parent of a merge
                     # and dropping it after it was provided for the second parents.
                     #
                     # It exists cases were only one parent of the merge will be walked. It
                     # happens when the "destination" the copy tracing is descendant from a
                     # new root, not common with the "source". In that case, we will only walk
                     # through merge parents that are descendant of changesets common
                     # between "source" and "destination".
                     #
                     # With the current case implementation if such changesets have a copy
                     # information, we'll keep them in memory until the end of
                     # _changesetforwardcopies. We don't expect the case to be frequent
                     # enough to matters.
                     #
                     # In addition, it would be possible to reach pathological case, were
                     # many first parent are met before any second parent is reached. In
                     # that case the cache could grow. If this even become an issue one can
                     # safely introduce a maximum cache size. This would trade extra CPU/IO
                     # time to save memory.
                     merge_caches = {}
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
                             e = merge_caches.pop(rev, None)
                             if e is not None:
                                 return e
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                             if p1 != node.nullrev and p2 != node.nullrev:
                                 # XXX some case we over cache, IGNORE
                                 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = []
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
             def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                             # _chain makes a copies, we can avoid doing so in some
                             # simple/linear cases.
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 return all_copies[targetrev]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
-                """reproduce copies between base and ctx in the wctx"""
+                """reproduce copies between base and ctx in the wctx
+                Unlike mergecopies(), this function will only consider copies between base
+                and ctx; it will ignore copies between base and wctx. Also unlike
+                mergecopies(), this function will apply copies to the working copy (instead
+                of just returning information about the copies). That makes it cheaper
+                (especially in the common case of base==ctx.p1()) and useful also when
+                experimental.copytrace=off.
+                merge.update() will have already marked most copies, but it will only
+                mark copies if it thinks the source files are related (see
+                merge._related()). It will also not mark copies if the file wasn't modified
+                on the local side. This function adds the copies that were "missed"
+                by merge.update().
+                """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion