upstream/mercurial-mirror Commit - r44624:45192589

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import multiprocessing

11

import multiprocessing

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(prefix, suffix):

67

def _chain(prefix, suffix):

68

"""chain two sets of copies 'prefix' and 'suffix'"""

68

"""chain two sets of copies 'prefix' and 'suffix'"""

69

result = prefix.copy()

69

result = prefix.copy()

70

for key, value in pycompat.iteritems(suffix):

70

for key, value in pycompat.iteritems(suffix):

71

result[key] = prefix.get(value, value)

71

result[key] = prefix.get(value, value)

72

return result

72

return result

73

74

75

def _tracefile(fctx, am, basemf):

75

def _tracefile(fctx, am, basemf):

76

"""return file context that is the ancestor of fctx present in ancestor

76

"""return file context that is the ancestor of fctx present in ancestor

77

manifest am

77

manifest am

78

79

Note: we used to try and stop after a given limit, however checking if that

79

Note: we used to try and stop after a given limit, however checking if that

80

limit is reached turned out to be very expensive. we are better off

80

limit is reached turned out to be very expensive. we are better off

81

disabling that feature."""

81

disabling that feature."""

82

83

for f in fctx.ancestors():

83

for f in fctx.ancestors():

84

path = f.path()

84

path = f.path()

85

if am.get(path, None) == f.filenode():

85

if am.get(path, None) == f.filenode():

86

return path

86

return path

87

if basemf and basemf.get(path, None) == f.filenode():

87

if basemf and basemf.get(path, None) == f.filenode():

88

return path

88

return path

89

90

91

def _dirstatecopies(repo, match=None):

91

def _dirstatecopies(repo, match=None):

92

ds = repo.dirstate

92

ds = repo.dirstate

93

c = ds.copies().copy()

93

c = ds.copies().copy()

94

for k in list(c):

94

for k in list(c):

95

if ds[k] not in b'anm' or (match and not match(k)):

95

if ds[k] not in b'anm' or (match and not match(k)):

96

del c[k]

96

del c[k]

97

return c

97

return c

98

99

100

def _computeforwardmissing(a, b, match=None):

100

def _computeforwardmissing(a, b, match=None):

101

"""Computes which files are in b but not a.

101

"""Computes which files are in b but not a.

102

This is its own function so extensions can easily wrap this call to see what

102

This is its own function so extensions can easily wrap this call to see what

103

files _forwardcopies is about to process.

103

files _forwardcopies is about to process.

104

"""

104

"""

105

ma = a.manifest()

105

ma = a.manifest()

106

mb = b.manifest()

106

mb = b.manifest()

107

return mb.filesnotin(ma, match=match)

107

return mb.filesnotin(ma, match=match)

108

109

110

def usechangesetcentricalgo(repo):

110

def usechangesetcentricalgo(repo):

111

"""Checks if we should use changeset-centric copy algorithms"""

111

"""Checks if we should use changeset-centric copy algorithms"""

112

if repo.filecopiesmode == b'changeset-sidedata':

112

if repo.filecopiesmode == b'changeset-sidedata':

113

return True

113

return True

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

115

changesetsource = (b'changeset-only', b'compatibility')

115

changesetsource = (b'changeset-only', b'compatibility')

116

return readfrom in changesetsource

116

return readfrom in changesetsource

117

118

119

def _committedforwardcopies(a, b, base, match):

119

def _committedforwardcopies(a, b, base, match):

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

121

# files might have to be traced back to the fctx parent of the last

121

# files might have to be traced back to the fctx parent of the last

122

# one-side-only changeset, but not further back than that

122

# one-side-only changeset, but not further back than that

123

repo = a._repo

123

repo = a._repo

124

125

if usechangesetcentricalgo(repo):

125

if usechangesetcentricalgo(repo):

126

return _changesetforwardcopies(a, b, match)

126

return _changesetforwardcopies(a, b, match)

127

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

129

dbg = repo.ui.debug

129

dbg = repo.ui.debug

130

if debug:

130

if debug:

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

132

am = a.manifest()

132

am = a.manifest()

133

basemf = None if base is None else base.manifest()

133

basemf = None if base is None else base.manifest()

134

135

# find where new files came from

135

# find where new files came from

136

# we currently don't try to find where old files went, too expensive

136

# we currently don't try to find where old files went, too expensive

137

# this means we can miss a case like 'hg rm b; hg cp a b'

137

# this means we can miss a case like 'hg rm b; hg cp a b'

138

cm = {}

138

cm = {}

139

140

# Computing the forward missing is quite expensive on large manifests, since

140

# Computing the forward missing is quite expensive on large manifests, since

141

# it compares the entire manifests. We can optimize it in the common use

141

# it compares the entire manifests. We can optimize it in the common use

142

# case of computing what copies are in a commit versus its parent (like

142

# case of computing what copies are in a commit versus its parent (like

143

# during a rebase or histedit). Note, we exclude merge commits from this

143

# during a rebase or histedit). Note, we exclude merge commits from this

144

# optimization, since the ctx.files() for a merge commit is not correct for

144

# optimization, since the ctx.files() for a merge commit is not correct for

145

# this comparison.

145

# this comparison.

146

forwardmissingmatch = match

146

forwardmissingmatch = match

147

if b.p1() == a and b.p2().node() == node.nullid:

147

if b.p1() == a and b.p2().node() == node.nullid:

148

filesmatcher = matchmod.exact(b.files())

148

filesmatcher = matchmod.exact(b.files())

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

151

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

153

154

if debug:

154

if debug:

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

156

157

for f in sorted(missing):

157

for f in sorted(missing):

158

if debug:

158

if debug:

159

dbg(b'debug.copies: tracing file: %s\n' % f)

159

dbg(b'debug.copies: tracing file: %s\n' % f)

160

fctx = b[f]

160

fctx = b[f]

161

fctx._ancestrycontext = ancestrycontext

161

fctx._ancestrycontext = ancestrycontext

162

163

if debug:

163

if debug:

164

start = util.timer()

164

start = util.timer()

165

opath = _tracefile(fctx, am, basemf)

165

opath = _tracefile(fctx, am, basemf)

166

if opath:

166

if opath:

167

if debug:

167

if debug:

168

dbg(b'debug.copies: rename of: %s\n' % opath)

168

dbg(b'debug.copies: rename of: %s\n' % opath)

169

cm[f] = opath

169

cm[f] = opath

170

if debug:

170

if debug:

171

dbg(

171

dbg(

172

b'debug.copies: time: %f seconds\n'

172

b'debug.copies: time: %f seconds\n'

173

% (util.timer() - start)

173

% (util.timer() - start)

174

)

174

)

175

return cm

175

return cm

176

177

178

def _revinfogetter(repo):

178

def _revinfogetter(repo):

179

"""return a function that return multiple data given a <rev>"i

179

"""return a function that return multiple data given a <rev>"i

180

181

* p1: revision number of first parent

181

* p1: revision number of first parent

182

* p2: revision number of first parent

182

* p2: revision number of first parent

183

* p1copies: mapping of copies from p1

183

* p1copies: mapping of copies from p1

184

* p2copies: mapping of copies from p2

184

* p2copies: mapping of copies from p2

185

* removed: a list of removed files

185

* removed: a list of removed files

186

"""

186

"""

187

cl = repo.changelog

187

cl = repo.changelog

188

parents = cl.parentrevs

188

parents = cl.parentrevs

189

190

if repo.filecopiesmode == b'changeset-sidedata':

190

if repo.filecopiesmode == b'changeset-sidedata':

191

changelogrevision = cl.changelogrevision

191

changelogrevision = cl.changelogrevision

192

flags = cl.flags

192

flags = cl.flags

193

194

# A small cache to avoid doing the work twice for merges

194

# A small cache to avoid doing the work twice for merges

195

#

195

#

196

# In the vast majority of cases, if we ask information for a revision

196

# In the vast majority of cases, if we ask information for a revision

197

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# about 1 parent, we'll later ask it for the other. So it make sense to

198

# keep the information around when reaching the first parent of a merge

198

# keep the information around when reaching the first parent of a merge

199

# and dropping it after it was provided for the second parents.

199

# and dropping it after it was provided for the second parents.

200

#

200

#

201

# It exists cases were only one parent of the merge will be walked. It

201

# It exists cases were only one parent of the merge will be walked. It

202

# happens when the "destination" the copy tracing is descendant from a

202

# happens when the "destination" the copy tracing is descendant from a

203

# new root, not common with the "source". In that case, we will only walk

203

# new root, not common with the "source". In that case, we will only walk

204

# through merge parents that are descendant of changesets common

204

# through merge parents that are descendant of changesets common

205

# between "source" and "destination".

205

# between "source" and "destination".

206

#

206

#

207

# With the current case implementation if such changesets have a copy

207

# With the current case implementation if such changesets have a copy

208

# information, we'll keep them in memory until the end of

208

# information, we'll keep them in memory until the end of

209

# _changesetforwardcopies. We don't expect the case to be frequent

209

# _changesetforwardcopies. We don't expect the case to be frequent

210

# enough to matters.

210

# enough to matters.

211

#

211

#

212

# In addition, it would be possible to reach pathological case, were

212

# In addition, it would be possible to reach pathological case, were

213

# many first parent are met before any second parent is reached. In

213

# many first parent are met before any second parent is reached. In

214

# that case the cache could grow. If this even become an issue one can

214

# that case the cache could grow. If this even become an issue one can

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

216

# time to save memory.

216

# time to save memory.

217

merge_caches = {}

217

merge_caches = {}

218

219

def revinfo(rev):

219

def revinfo(rev):

220

p1, p2 = parents(rev)

220

p1, p2 = parents(rev)

221

if flags(rev) & REVIDX_SIDEDATA:

221

if flags(rev) & REVIDX_SIDEDATA:

222

e = merge_caches.pop(rev, None)

222

e = merge_caches.pop(rev, None)

223

if e is not None:

223

if e is not None:

224

return e

224

return e

225

c = changelogrevision(rev)

225

c = changelogrevision(rev)

226

p1copies = c.p1copies

226

p1copies = c.p1copies

227

p2copies = c.p2copies

227

p2copies = c.p2copies

228

removed = c.filesremoved

228

removed = c.filesremoved

229

if p1 != node.nullrev and p2 != node.nullrev:

229

if p1 != node.nullrev and p2 != node.nullrev:

230

# XXX some case we over cache, IGNORE

230

# XXX some case we over cache, IGNORE

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

232

else:

232

else:

233

p1copies = {}

233

p1copies = {}

234

p2copies = {}

234

p2copies = {}

235

removed = []

235

removed = []

236

return p1, p2, p1copies, p2copies, removed

236

return p1, p2, p1copies, p2copies, removed

237

238

else:

238

else:

239

240

def revinfo(rev):

240

def revinfo(rev):

241

p1, p2 = parents(rev)

241

p1, p2 = parents(rev)

242

ctx = repo[rev]

242

ctx = repo[rev]

243

p1copies, p2copies = ctx._copies

243

p1copies, p2copies = ctx._copies

244

removed = ctx.filesremoved()

244

removed = ctx.filesremoved()

245

return p1, p2, p1copies, p2copies, removed

245

return p1, p2, p1copies, p2copies, removed

246

247

return revinfo

247

return revinfo

248

249

250

def _changesetforwardcopies(a, b, match):

250

def _changesetforwardcopies(a, b, match):

251

if a.rev() in (node.nullrev, b.rev()):

251

if a.rev() in (node.nullrev, b.rev()):

252

return {}

252

return {}

253

254

repo = a.repo().unfiltered()

254

repo = a.repo().unfiltered()

255

children = {}

255

children = {}

256

revinfo = _revinfogetter(repo)

256

revinfo = _revinfogetter(repo)

257

258

cl = repo.changelog

258

cl = repo.changelog

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

260

mrset = set(missingrevs)

260

mrset = set(missingrevs)

261

roots = set()

261

roots = set()

262

for r in missingrevs:

262

for r in missingrevs:

263

for p in cl.parentrevs(r):

263

for p in cl.parentrevs(r):

264

if p == node.nullrev:

264

if p == node.nullrev:

265

continue

265

continue

266

if p not in children:

266

if p not in children:

267

children[p] = [r]

267

children[p] = [r]

268

else:

268

else:

269

children[p].append(r)

269

children[p].append(r)

270

if p not in mrset:

270

if p not in mrset:

271

roots.add(p)

271

roots.add(p)

272

if not roots:

272

if not roots:

273

# no common revision to track copies from

273

# no common revision to track copies from

274

return {}

274

return {}

275

min_root = min(roots)

275

min_root = min(roots)

276

277

from_head = set(

277

from_head = set(

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

279

)

279

)

280

281

iterrevs = set(from_head)

281

iterrevs = set(from_head)

282

iterrevs &= mrset

282

iterrevs &= mrset

283

iterrevs.update(roots)

283

iterrevs.update(roots)

284

iterrevs.remove(b.rev())

284

iterrevs.remove(b.rev())

285

revs = sorted(iterrevs)

285

revs = sorted(iterrevs)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

287

288

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

290

"""combine the copies information for each item of iterrevs

290

"""combine the copies information for each item of iterrevs

291

292

revs: sorted iterable of revision to visit

292

revs: sorted iterable of revision to visit

293

children: a {parent: [children]} mapping.

293

children: a {parent: [children]} mapping.

294

targetrev: the final copies destination revision (not in iterrevs)

294

targetrev: the final copies destination revision (not in iterrevs)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

296

match: a matcher

296

match: a matcher

297

298

It returns the aggregated copies information for `targetrev`.

298

It returns the aggregated copies information for `targetrev`.

299

"""

299

"""

300

all_copies = {}

300

all_copies = {}

301

alwaysmatch = match.always()

301

alwaysmatch = match.always()

302

for r in revs:

302

for r in revs:

303

copies = all_copies.pop(r, None)

303

copies = all_copies.pop(r, None)

304

if copies is None:

304

if copies is None:

305

# this is a root

305

# this is a root

306

copies = {}

306

copies = {}

307

for i, c in enumerate(children[r]):

307

for i, c in enumerate(children[r]):

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

309

if r == p1:

309

if r == p1:

310

parent = 1

310

parent = 1

311

childcopies = p1copies

311

childcopies = p1copies

312

else:

312

else:

313

assert r == p2

313

assert r == p2

314

parent = 2

314

parent = 2

315

childcopies = p2copies

315

childcopies = p2copies

316

if not alwaysmatch:

316

if not alwaysmatch:

317

childcopies = {

317

childcopies = {

318

dst: src for dst, src in childcopies.items() if match(dst)

318

dst: src for dst, src in childcopies.items() if match(dst)

319

}

319

}

320

newcopies = copies

320

newcopies = copies

321

if childcopies:

321

if childcopies:

322

newcopies = _chain(newcopies, childcopies)

322

newcopies = _chain(newcopies, childcopies)

323

# _chain makes a copies, we can avoid doing so in some

323

# _chain makes a copies, we can avoid doing so in some

324

# simple/linear cases.

324

# simple/linear cases.

325

assert newcopies is not copies

325

assert newcopies is not copies

326

for f in removed:

326

for f in removed:

327

if f in newcopies:

327

if f in newcopies:

328

if newcopies is copies:

328

if newcopies is copies:

329

# copy on write to avoid affecting potential other

329

# copy on write to avoid affecting potential other

330

# branches. when there are no other branches, this

330

# branches. when there are no other branches, this

331

# could be avoided.

331

# could be avoided.

332

newcopies = copies.copy()

332

newcopies = copies.copy()

333

del newcopies[f]

333

del newcopies[f]

334

othercopies = all_copies.get(c)

334

othercopies = all_copies.get(c)

335

if othercopies is None:

335

if othercopies is None:

336

all_copies[c] = newcopies

336

all_copies[c] = newcopies

337

else:

337

else:

338

# we are the second parent to work on c, we need to merge our

338

# we are the second parent to work on c, we need to merge our

339

# work with the other.

339

# work with the other.

340

#

340

#

341

# Unlike when copies are stored in the filelog, we consider

341

# Unlike when copies are stored in the filelog, we consider

342

# it a copy even if the destination already existed on the

342

# it a copy even if the destination already existed on the

343

# other branch. It's simply too expensive to check if the

343

# other branch. It's simply too expensive to check if the

344

# file existed in the manifest.

344

# file existed in the manifest.

345

#

345

#

346

# In case of conflict, parent 1 take precedence over parent 2.

346

# In case of conflict, parent 1 take precedence over parent 2.

347

# This is an arbitrary choice made anew when implementing

347

# This is an arbitrary choice made anew when implementing

348

# changeset based copies. It was made without regards with

348

# changeset based copies. It was made without regards with

349

# potential filelog related behavior.

349

# potential filelog related behavior.

350

if parent == 1:

350

if parent == 1:

351

othercopies.update(newcopies)

351

othercopies.update(newcopies)

352

else:

352

else:

353

newcopies.update(othercopies)

353

newcopies.update(othercopies)

354

all_copies[c] = newcopies

354

all_copies[c] = newcopies

355

return all_copies[targetrev]

355

return all_copies[targetrev]

356

357

358

def _forwardcopies(a, b, base=None, match=None):

358

def _forwardcopies(a, b, base=None, match=None):

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

360

361

if base is None:

361

if base is None:

362

base = a

362

base = a

363

match = a.repo().narrowmatch(match)

363

match = a.repo().narrowmatch(match)

364

# check for working copy

364

# check for working copy

365

if b.rev() is None:

365

if b.rev() is None:

366

cm = _committedforwardcopies(a, b.p1(), base, match)

366

cm = _committedforwardcopies(a, b.p1(), base, match)

367

# combine copies from dirstate if necessary

367

# combine copies from dirstate if necessary

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

369

else:

369

else:

370

copies = _committedforwardcopies(a, b, base, match)

370

copies = _committedforwardcopies(a, b, base, match)

371

return copies

371

return copies

372

373

374

def _backwardrenames(a, b, match):

374

def _backwardrenames(a, b, match):

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

376

return {}

376

return {}

377

378

# Even though we're not taking copies into account, 1:n rename situations

378

# Even though we're not taking copies into account, 1:n rename situations

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

380

# arbitrarily pick one of the renames.

380

# arbitrarily pick one of the renames.

381

# We don't want to pass in "match" here, since that would filter

381

# We don't want to pass in "match" here, since that would filter

382

# the destination by it. Since we're reversing the copies, we want

382

# the destination by it. Since we're reversing the copies, we want

383

# to filter the source instead.

383

# to filter the source instead.

384

f = _forwardcopies(b, a)

384

f = _forwardcopies(b, a)

385

r = {}

385

r = {}

386

for k, v in sorted(pycompat.iteritems(f)):

386

for k, v in sorted(pycompat.iteritems(f)):

387

if match and not match(v):

387

if match and not match(v):

388

continue

388

continue

389

# remove copies

389

# remove copies

390

if v in a:

390

if v in a:

391

continue

391

continue

392

r[v] = k

392

r[v] = k

393

return r

393

return r

394

395

396

def pathcopies(x, y, match=None):

396

def pathcopies(x, y, match=None):

397

"""find {dst@y: src@x} copy mapping for directed compare"""

397

"""find {dst@y: src@x} copy mapping for directed compare"""

398

repo = x._repo

398

repo = x._repo

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

400

if debug:

400

if debug:

401

repo.ui.debug(

401

repo.ui.debug(

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

403

)

403

)

404

if x == y or not x or not y:

404

if x == y or not x or not y:

405

return {}

405

return {}

406

a = y.ancestor(x)

406

a = y.ancestor(x)

407

if a == x:

407

if a == x:

408

if debug:

408

if debug:

409

repo.ui.debug(b'debug.copies: search mode: forward\n')

409

repo.ui.debug(b'debug.copies: search mode: forward\n')

410

if y.rev() is None and x == y.p1():

410

if y.rev() is None and x == y.p1():

411

# short-circuit to avoid issues with merge states

411

# short-circuit to avoid issues with merge states

412

return _dirstatecopies(repo, match)

412

return _dirstatecopies(repo, match)

413

copies = _forwardcopies(x, y, match=match)

413

copies = _forwardcopies(x, y, match=match)

414

elif a == y:

414

elif a == y:

415

if debug:

415

if debug:

416

repo.ui.debug(b'debug.copies: search mode: backward\n')

416

repo.ui.debug(b'debug.copies: search mode: backward\n')

417

copies = _backwardrenames(x, y, match=match)

417

copies = _backwardrenames(x, y, match=match)

418

else:

418

else:

419

if debug:

419

if debug:

420

repo.ui.debug(b'debug.copies: search mode: combined\n')

420

repo.ui.debug(b'debug.copies: search mode: combined\n')

421

base = None

421

base = None

422

if a.rev() != node.nullrev:

422

if a.rev() != node.nullrev:

423

base = x

423

base = x

424

copies = _chain(

424

copies = _chain(

425

_backwardrenames(x, a, match=match),

425

_backwardrenames(x, a, match=match),

426

_forwardcopies(a, y, base, match=match),

426

_forwardcopies(a, y, base, match=match),

427

)

427

)

428

_filter(x, y, copies)

428

_filter(x, y, copies)

429

return copies

429

return copies

430

431

432

def mergecopies(repo, c1, c2, base):

432

def mergecopies(repo, c1, c2, base):

433

"""

433

"""

434

Finds moves and copies between context c1 and c2 that are relevant for

434

Finds moves and copies between context c1 and c2 that are relevant for

435

merging. 'base' will be used as the merge base.

435

merging. 'base' will be used as the merge base.

436

437

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

437

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

438

files that were moved/ copied in one merge parent and modified in another.

438

files that were moved/ copied in one merge parent and modified in another.

439

For example:

439

For example:

440

441

o ---> 4 another commit

441

o ---> 4 another commit

442

|

442

|

443

| o ---> 3 commit that modifies a.txt

443

| o ---> 3 commit that modifies a.txt

444

| /

444

| /

445

o / ---> 2 commit that moves a.txt to b.txt

445

o / ---> 2 commit that moves a.txt to b.txt

446

|/

446

|/

447

o ---> 1 merge base

447

o ---> 1 merge base

448

449

If we try to rebase revision 3 on revision 4, since there is no a.txt in

449

If we try to rebase revision 3 on revision 4, since there is no a.txt in

450

revision 4, and if user have copytrace disabled, we prints the following

450

revision 4, and if user have copytrace disabled, we prints the following

451

message:

451

message:

452

453

```other changed <file> which local deleted```

453

```other changed <file> which local deleted```

454

455

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

455

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

456

"dirmove".

456

"dirmove".

457

458

"copy" is a mapping from destination name -> source name,

458

"copy" is a mapping from destination name -> source name,

459

where source is in c1 and destination is in c2 or vice-versa.

459

where source is in c1 and destination is in c2 or vice-versa.

460

461

"movewithdir" is a mapping from source name -> destination name,

461

"movewithdir" is a mapping from source name -> destination name,

462

where the file at source present in one context but not the other

462

where the file at source present in one context but not the other

463

needs to be moved to destination by the merge process, because the

463

needs to be moved to destination by the merge process, because the

464

other context moved the directory it is in.

464

other context moved the directory it is in.

465

466

"diverge" is a mapping of source name -> list of destination names

466

"diverge" is a mapping of source name -> list of destination names

467

for divergent renames.

467

for divergent renames.

468

469

"renamedelete" is a mapping of source name -> list of destination

469

"renamedelete" is a mapping of source name -> list of destination

470

names for files deleted in c1 that were renamed in c2 or vice-versa.

470

names for files deleted in c1 that were renamed in c2 or vice-versa.

471

472

"dirmove" is a mapping of detected source dir -> destination dir renames.

472

"dirmove" is a mapping of detected source dir -> destination dir renames.

473

This is needed for handling changes to new files previously grafted into

473

This is needed for handling changes to new files previously grafted into

474

renamed directories.

474

renamed directories.

475

476

This function calls different copytracing algorithms based on config.

476

This function calls different copytracing algorithms based on config.

477

"""

477

"""

478

# avoid silly behavior for update from empty dir

478

# avoid silly behavior for update from empty dir

479

if not c1 or not c2 or c1 == c2:

479

if not c1 or not c2 or c1 == c2:

480

return {}, {}, {}, {}, {}

480

return {}, {}, {}, {}, {}

481

482

narrowmatch = c1.repo().narrowmatch()

482

narrowmatch = c1.repo().narrowmatch()

483

484

# avoid silly behavior for parent -> working dir

484

# avoid silly behavior for parent -> working dir

485

if c2.node() is None and c1.node() == repo.dirstate.p1():

485

if c2.node() is None and c1.node() == repo.dirstate.p1():

486

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

486

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

487

488

copytracing = repo.ui.config(b'experimental', b'copytrace')

488

copytracing = repo.ui.config(b'experimental', b'copytrace')

489

if stringutil.parsebool(copytracing) is False:

489

if stringutil.parsebool(copytracing) is False:

490

# stringutil.parsebool() returns None when it is unable to parse the

490

# stringutil.parsebool() returns None when it is unable to parse the

491

# value, so we should rely on making sure copytracing is on such cases

491

# value, so we should rely on making sure copytracing is on such cases

492

return {}, {}, {}, {}, {}

492

return {}, {}, {}, {}, {}

493

494

if usechangesetcentricalgo(repo):

494

if usechangesetcentricalgo(repo):

495

# The heuristics don't make sense when we need changeset-centric algos

495

# The heuristics don't make sense when we need changeset-centric algos

496

return _fullcopytracing(repo, c1, c2, base)

496

return _fullcopytracing(repo, c1, c2, base)

497

498

# Copy trace disabling is explicitly below the node == p1 logic above

498

# Copy trace disabling is explicitly below the node == p1 logic above

499

# because the logic above is required for a simple copy to be kept across a

499

# because the logic above is required for a simple copy to be kept across a

500

# rebase.

500

# rebase.

501

if copytracing == b'heuristics':

501

if copytracing == b'heuristics':

502

# Do full copytracing if only non-public revisions are involved as

502

# Do full copytracing if only non-public revisions are involved as

503

# that will be fast enough and will also cover the copies which could

503

# that will be fast enough and will also cover the copies which could

504

# be missed by heuristics

504

# be missed by heuristics

505

if _isfullcopytraceable(repo, c1, base):

505

if _isfullcopytraceable(repo, c1, base):

506

return _fullcopytracing(repo, c1, c2, base)

506

return _fullcopytracing(repo, c1, c2, base)

507

return _heuristicscopytracing(repo, c1, c2, base)

507

return _heuristicscopytracing(repo, c1, c2, base)

508

else:

508

else:

509

return _fullcopytracing(repo, c1, c2, base)

509

return _fullcopytracing(repo, c1, c2, base)

510

511

512

def _isfullcopytraceable(repo, c1, base):

512

def _isfullcopytraceable(repo, c1, base):

513

""" Checks that if base, source and destination are all no-public branches,

513

""" Checks that if base, source and destination are all no-public branches,

514

if yes let's use the full copytrace algorithm for increased capabilities

514

if yes let's use the full copytrace algorithm for increased capabilities

515

since it will be fast enough.

515

since it will be fast enough.

516

517

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

517

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

518

number of changesets from c1 to base such that if number of changesets are

518

number of changesets from c1 to base such that if number of changesets are

519

more than the limit, full copytracing algorithm won't be used.

519

more than the limit, full copytracing algorithm won't be used.

520

"""

520

"""

521

if c1.rev() is None:

521

if c1.rev() is None:

522

c1 = c1.p1()

522

c1 = c1.p1()

523

if c1.mutable() and base.mutable():

523

if c1.mutable() and base.mutable():

524

sourcecommitlimit = repo.ui.configint(

524

sourcecommitlimit = repo.ui.configint(

525

b'experimental', b'copytrace.sourcecommitlimit'

525

b'experimental', b'copytrace.sourcecommitlimit'

526

)

526

)

527

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

527

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

528

return commits < sourcecommitlimit

528

return commits < sourcecommitlimit

529

return False

529

return False

530

531

532

def _checksinglesidecopies(

532

def _checksinglesidecopies(

533

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

533

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

534

):

534

):

535

if src not in m2:

535

if src not in m2:

536

# deleted on side 2

536

# deleted on side 2

537

if src not in m1:

537

if src not in m1:

538

# renamed on side 1, deleted on side 2

538

# renamed on side 1, deleted on side 2

539

renamedelete[src] = dsts1

539

renamedelete[src] = dsts1

540

elif m2[src] != mb[src]:

540

elif m2[src] != mb[src]:

541

if not _related(c2[src], base[src]):

541

if not _related(c2[src], base[src]):

542

return

542

return

543

# modified on side 2

543

# modified on side 2

544

for dst in dsts1:

544

for dst in dsts1:

545

if dst not in m2:

545

if dst not in m2:

546

# dst not added on side 2 (handle as regular

546

# dst not added on side 2 (handle as regular

547

# "both created" case in manifestmerge otherwise)

547

# "both created" case in manifestmerge otherwise)

548

copy[dst] = src

548

copy[dst] = src

549

550

551

def _fullcopytracing(repo, c1, c2, base):

551

def _fullcopytracing(repo, c1, c2, base):

552

""" The full copytracing algorithm which finds all the new files that were

552

""" The full copytracing algorithm which finds all the new files that were

553

added from merge base up to the top commit and for each file it checks if

553

added from merge base up to the top commit and for each file it checks if

554

this file was copied from another file.

554

this file was copied from another file.

555

556

This is pretty slow when a lot of changesets are involved but will track all

556

This is pretty slow when a lot of changesets are involved but will track all

557

the copies.

557

the copies.

558

"""

558

"""

559

m1 = c1.manifest()

559

m1 = c1.manifest()

560

m2 = c2.manifest()

560

m2 = c2.manifest()

561

mb = base.manifest()

561

mb = base.manifest()

562

563

copies1 = pathcopies(base, c1)

563

copies1 = pathcopies(base, c1)

564

copies2 = pathcopies(base, c2)

564

copies2 = pathcopies(base, c2)

565

566

if not (copies1 or copies2):

566

if not (copies1 or copies2):

567

return {}, {}, {}, {}, {}

567

return {}, {}, {}, {}, {}

568

569

inversecopies1 = {}

569

inversecopies1 = {}

570

inversecopies2 = {}

570

inversecopies2 = {}

571

for dst, src in copies1.items():

571

for dst, src in copies1.items():

572

inversecopies1.setdefault(src, []).append(dst)

572

inversecopies1.setdefault(src, []).append(dst)

573

for dst, src in copies2.items():

573

for dst, src in copies2.items():

574

inversecopies2.setdefault(src, []).append(dst)

574

inversecopies2.setdefault(src, []).append(dst)

575

576

copy = {}

576

copy = {}

577

diverge = {}

577

diverge = {}

578

renamedelete = {}

578

renamedelete = {}

579

allsources = set(inversecopies1) | set(inversecopies2)

579

allsources = set(inversecopies1) | set(inversecopies2)

580

for src in allsources:

580

for src in allsources:

581

dsts1 = inversecopies1.get(src)

581

dsts1 = inversecopies1.get(src)

582

dsts2 = inversecopies2.get(src)

582

dsts2 = inversecopies2.get(src)

583

if dsts1 and dsts2:

583

if dsts1 and dsts2:

584

# copied/renamed on both sides

584

# copied/renamed on both sides

585

if src not in m1 and src not in m2:

585

if src not in m1 and src not in m2:

586

# renamed on both sides

586

# renamed on both sides

587

dsts1 = set(dsts1)

587

dsts1 = set(dsts1)

588

dsts2 = set(dsts2)

588

dsts2 = set(dsts2)

589

# If there's some overlap in the rename destinations, we

589

# If there's some overlap in the rename destinations, we

590

# consider it not divergent. For example, if side 1 copies 'a'

590

# consider it not divergent. For example, if side 1 copies 'a'

591

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

591

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

592

# and 'd' and deletes 'a'.

592

# and 'd' and deletes 'a'.

593

if dsts1 & dsts2:

593

if dsts1 & dsts2:

594

for dst in dsts1 & dsts2:

594

for dst in dsts1 & dsts2:

595

copy[dst] = src

595

copy[dst] = src

596

else:

596

else:

597

diverge[src] = sorted(dsts1 | dsts2)

597

diverge[src] = sorted(dsts1 | dsts2)

598

elif src in m1 and src in m2:

598

elif src in m1 and src in m2:

599

# copied on both sides

599

# copied on both sides

600

dsts1 = set(dsts1)

600

dsts1 = set(dsts1)

601

dsts2 = set(dsts2)

601

dsts2 = set(dsts2)

602

for dst in dsts1 & dsts2:

602

for dst in dsts1 & dsts2:

603

copy[dst] = src

603

copy[dst] = src

604

# TODO: Handle cases where it was renamed on one side and copied

604

# TODO: Handle cases where it was renamed on one side and copied

605

# on the other side

605

# on the other side

606

elif dsts1:

606

elif dsts1:

607

# copied/renamed only on side 1

607

# copied/renamed only on side 1

608

_checksinglesidecopies(

608

_checksinglesidecopies(

609

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

609

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

610

)

610

)

611

elif dsts2:

611

elif dsts2:

612

# copied/renamed only on side 2

612

# copied/renamed only on side 2

613

_checksinglesidecopies(

613

_checksinglesidecopies(

614

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

614

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

615

)

615

)

616

617

# find interesting file sets from manifests

617

# find interesting file sets from manifests

618

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

618

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

619

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

619

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

620

u1 = sorted(addedinm1 - addedinm2)

620

u1 = sorted(addedinm1 - addedinm2)

621

u2 = sorted(addedinm2 - addedinm1)

621

u2 = sorted(addedinm2 - addedinm1)

622

623

header = b" unmatched files in %s"

623

header = b" unmatched files in %s"

624

if u1:

624

if u1:

625

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

625

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

626

if u2:

626

if u2:

627

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

627

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

628

629

fullcopy = copies1.copy()

629

fullcopy = copies1.copy()

630

fullcopy.update(copies2)

630

fullcopy.update(copies2)

631

632

if repo.ui.debugflag:

632

if repo.ui.debugflag:

633

renamedeleteset = set()

633

renamedeleteset = set()

634

divergeset = set()

634

divergeset = set()

635

for dsts in diverge.values():

635

for dsts in diverge.values():

636

divergeset.update(dsts)

636

divergeset.update(dsts)

637

for dsts in renamedelete.values():

637

for dsts in renamedelete.values():

638

renamedeleteset.update(dsts)

638

renamedeleteset.update(dsts)

639

640

repo.ui.debug(

640

repo.ui.debug(

641

b" all copies found (* = to merge, ! = divergent, "

641

b" all copies found (* = to merge, ! = divergent, "

642

b"% = renamed and deleted):\n"

642

b"% = renamed and deleted):\n"

643

)

643

)

644

for f in sorted(fullcopy):

644

for f in sorted(fullcopy):

645

note = b""

645

note = b""

646

if f in copy:

646

if f in copy:

647

note += b"*"

647

note += b"*"

648

if f in divergeset:

648

if f in divergeset:

649

note += b"!"

649

note += b"!"

650

if f in renamedeleteset:

650

if f in renamedeleteset:

651

note += b"%"

651

note += b"%"

652

repo.ui.debug(

652

repo.ui.debug(

653

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

653

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

654

)

654

)

655

del renamedeleteset

655

del renamedeleteset

656

del divergeset

656

del divergeset

657

658

repo.ui.debug(b" checking for directory renames\n")

658

repo.ui.debug(b" checking for directory renames\n")

659

660

dirmove, movewithdir = _dir_renames(repo, c1, c2, copy, fullcopy, u1, u2)

661

662

return copy, movewithdir, diverge, renamedelete, dirmove

663

664

665

def _dir_renames(repo, c1, c2, copy, fullcopy, u1, u2):

660

# generate a directory move map

666

# generate a directory move map

661

d1, d2 = c1.dirs(), c2.dirs()

667

d1, d2 = c1.dirs(), c2.dirs()

662

invalid = set()

668

invalid = set()

663

dirmove = {}

669

dirmove = {}

664

670

665

# examine each file copy for a potential directory move, which is

671

# examine each file copy for a potential directory move, which is

666

# when all the files in a directory are moved to a new directory

672

# when all the files in a directory are moved to a new directory

667

for dst, src in pycompat.iteritems(fullcopy):

673

for dst, src in pycompat.iteritems(fullcopy):

668

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

674

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

669

if dsrc in invalid:

675

if dsrc in invalid:

670

# already seen to be uninteresting

676

# already seen to be uninteresting

671

continue

677

continue

672

elif dsrc in d1 and ddst in d1:

678

elif dsrc in d1 and ddst in d1:

673

# directory wasn't entirely moved locally

679

# directory wasn't entirely moved locally

674

invalid.add(dsrc)

680

invalid.add(dsrc)

675

elif dsrc in d2 and ddst in d2:

681

elif dsrc in d2 and ddst in d2:

676

# directory wasn't entirely moved remotely

682

# directory wasn't entirely moved remotely

677

invalid.add(dsrc)

683

invalid.add(dsrc)

678

elif dsrc in dirmove and dirmove[dsrc] != ddst:

684

elif dsrc in dirmove and dirmove[dsrc] != ddst:

679

# files from the same directory moved to two different places

685

# files from the same directory moved to two different places

680

invalid.add(dsrc)

686

invalid.add(dsrc)

681

else:

687

else:

682

# looks good so far

688

# looks good so far

683

dirmove[dsrc] = ddst

689

dirmove[dsrc] = ddst

684

690

685

for i in invalid:

691

for i in invalid:

686

if i in dirmove:

692

if i in dirmove:

687

del dirmove[i]

693

del dirmove[i]

688

del d1, d2, invalid

694

del d1, d2, invalid

689

695

690

if not dirmove:

696

if not dirmove:

691

return copy, {}, diverge, renamedelete, {}

697

return {}, {}

692

698

693

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

699

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

694

700

695

for d in dirmove:

701

for d in dirmove:

696

repo.ui.debug(

702

repo.ui.debug(

697

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

703

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

698

)

704

)

699

705

700

movewithdir = {}

706

movewithdir = {}

701

# check unaccounted nonoverlapping files against directory moves

707

# check unaccounted nonoverlapping files against directory moves

702

for f in u1 + u2:

708

for f in u1 + u2:

703

if f not in fullcopy:

709

if f not in fullcopy:

704

for d in dirmove:

710

for d in dirmove:

705

if f.startswith(d):

711

if f.startswith(d):

706

# new file added in a directory that was moved, move it

712

# new file added in a directory that was moved, move it

707

df = dirmove[d] + f[len(d) :]

713

df = dirmove[d] + f[len(d) :]

708

if df not in copy:

714

if df not in copy:

709

movewithdir[f] = df

715

movewithdir[f] = df

710

repo.ui.debug(

716

repo.ui.debug(

711

b" pending file src: '%s' -> dst: '%s'\n"

717

b" pending file src: '%s' -> dst: '%s'\n"

712

% (f, df)

718

% (f, df)

713

)

719

)

714

break

720

break

715

721

716

return copy, movewithdir, diverge, renamedelete, dirmove

722

return dirmove, movewithdir

717

723

718

724

719

def _heuristicscopytracing(repo, c1, c2, base):

725

def _heuristicscopytracing(repo, c1, c2, base):

720

""" Fast copytracing using filename heuristics

726

""" Fast copytracing using filename heuristics

721

727

722

Assumes that moves or renames are of following two types:

728

Assumes that moves or renames are of following two types:

723

729

724

1) Inside a directory only (same directory name but different filenames)

730

1) Inside a directory only (same directory name but different filenames)

725

2) Move from one directory to another

731

2) Move from one directory to another

726

(same filenames but different directory names)

732

(same filenames but different directory names)

727

733

728

Works only when there are no merge commits in the "source branch".

734

Works only when there are no merge commits in the "source branch".

729

Source branch is commits from base up to c2 not including base.

735

Source branch is commits from base up to c2 not including base.

730

736

731

If merge is involved it fallbacks to _fullcopytracing().

737

If merge is involved it fallbacks to _fullcopytracing().

732

738

733

Can be used by setting the following config:

739

Can be used by setting the following config:

734

740

735

[experimental]

741

[experimental]

736

copytrace = heuristics

742

copytrace = heuristics

737

743

738

In some cases the copy/move candidates found by heuristics can be very large

744

In some cases the copy/move candidates found by heuristics can be very large

739

in number and that will make the algorithm slow. The number of possible

745

in number and that will make the algorithm slow. The number of possible

740

candidates to check can be limited by using the config

746

candidates to check can be limited by using the config

741

`experimental.copytrace.movecandidateslimit` which defaults to 100.

747

`experimental.copytrace.movecandidateslimit` which defaults to 100.

742

"""

748

"""

743

749

744

if c1.rev() is None:

750

if c1.rev() is None:

745

c1 = c1.p1()

751

c1 = c1.p1()

746

if c2.rev() is None:

752

if c2.rev() is None:

747

c2 = c2.p1()

753

c2 = c2.p1()

748

754

749

copies = {}

755

copies = {}

750

756

751

changedfiles = set()

757

changedfiles = set()

752

m1 = c1.manifest()

758

m1 = c1.manifest()

753

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

759

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

754

# If base is not in c2 branch, we switch to fullcopytracing

760

# If base is not in c2 branch, we switch to fullcopytracing

755

repo.ui.debug(

761

repo.ui.debug(

756

b"switching to full copytracing as base is not "

762

b"switching to full copytracing as base is not "

757

b"an ancestor of c2\n"

763

b"an ancestor of c2\n"

758

)

764

)

759

return _fullcopytracing(repo, c1, c2, base)

765

return _fullcopytracing(repo, c1, c2, base)

760

766

761

ctx = c2

767

ctx = c2

762

while ctx != base:

768

while ctx != base:

763

if len(ctx.parents()) == 2:

769

if len(ctx.parents()) == 2:

764

# To keep things simple let's not handle merges

770

# To keep things simple let's not handle merges

765

repo.ui.debug(b"switching to full copytracing because of merges\n")

771

repo.ui.debug(b"switching to full copytracing because of merges\n")

766

return _fullcopytracing(repo, c1, c2, base)

772

return _fullcopytracing(repo, c1, c2, base)

767

changedfiles.update(ctx.files())

773

changedfiles.update(ctx.files())

768

ctx = ctx.p1()

774

ctx = ctx.p1()

769

775

770

cp = _forwardcopies(base, c2)

776

cp = _forwardcopies(base, c2)

771

for dst, src in pycompat.iteritems(cp):

777

for dst, src in pycompat.iteritems(cp):

772

if src in m1:

778

if src in m1:

773

copies[dst] = src

779

copies[dst] = src

774

780

775

# file is missing if it isn't present in the destination, but is present in

781

# file is missing if it isn't present in the destination, but is present in

776

# the base and present in the source.

782

# the base and present in the source.

777

# Presence in the base is important to exclude added files, presence in the

783

# Presence in the base is important to exclude added files, presence in the

778

# source is important to exclude removed files.

784

# source is important to exclude removed files.

779

filt = lambda f: f not in m1 and f in base and f in c2

785

filt = lambda f: f not in m1 and f in base and f in c2

780

missingfiles = [f for f in changedfiles if filt(f)]

786

missingfiles = [f for f in changedfiles if filt(f)]

781

787

782

if missingfiles:

788

if missingfiles:

783

basenametofilename = collections.defaultdict(list)

789

basenametofilename = collections.defaultdict(list)

784

dirnametofilename = collections.defaultdict(list)

790

dirnametofilename = collections.defaultdict(list)

785

791

786

for f in m1.filesnotin(base.manifest()):

792

for f in m1.filesnotin(base.manifest()):

787

basename = os.path.basename(f)

793

basename = os.path.basename(f)

788

dirname = os.path.dirname(f)

794

dirname = os.path.dirname(f)

789

basenametofilename[basename].append(f)

795

basenametofilename[basename].append(f)

790

dirnametofilename[dirname].append(f)

796

dirnametofilename[dirname].append(f)

791

797

792

for f in missingfiles:

798

for f in missingfiles:

793

basename = os.path.basename(f)

799

basename = os.path.basename(f)

794

dirname = os.path.dirname(f)

800

dirname = os.path.dirname(f)

795

samebasename = basenametofilename[basename]

801

samebasename = basenametofilename[basename]

796

samedirname = dirnametofilename[dirname]

802

samedirname = dirnametofilename[dirname]

797

movecandidates = samebasename + samedirname

803

movecandidates = samebasename + samedirname

798

# f is guaranteed to be present in c2, that's why

804

# f is guaranteed to be present in c2, that's why

799

# c2.filectx(f) won't fail

805

# c2.filectx(f) won't fail

800

f2 = c2.filectx(f)

806

f2 = c2.filectx(f)

801

# we can have a lot of candidates which can slow down the heuristics

807

# we can have a lot of candidates which can slow down the heuristics

802

# config value to limit the number of candidates moves to check

808

# config value to limit the number of candidates moves to check

803

maxcandidates = repo.ui.configint(

809

maxcandidates = repo.ui.configint(

804

b'experimental', b'copytrace.movecandidateslimit'

810

b'experimental', b'copytrace.movecandidateslimit'

805

)

811

)

806

812

807

if len(movecandidates) > maxcandidates:

813

if len(movecandidates) > maxcandidates:

808

repo.ui.status(

814

repo.ui.status(

809

_(

815

_(

810

b"skipping copytracing for '%s', more "

816

b"skipping copytracing for '%s', more "

811

b"candidates than the limit: %d\n"

817

b"candidates than the limit: %d\n"

812

)

818

)

813

% (f, len(movecandidates))

819

% (f, len(movecandidates))

814

)

820

)

815

continue

821

continue

816

822

817

for candidate in movecandidates:

823

for candidate in movecandidates:

818

f1 = c1.filectx(candidate)

824

f1 = c1.filectx(candidate)

819

if _related(f1, f2):

825

if _related(f1, f2):

820

# if there are a few related copies then we'll merge

826

# if there are a few related copies then we'll merge

821

# changes into all of them. This matches the behaviour

827

# changes into all of them. This matches the behaviour

822

# of upstream copytracing

828

# of upstream copytracing

823

copies[candidate] = f

829

copies[candidate] = f

824

830

825

return copies, {}, {}, {}, {}

831

return copies, {}, {}, {}, {}

826

832

827

833

828

def _related(f1, f2):

834

def _related(f1, f2):

829

"""return True if f1 and f2 filectx have a common ancestor

835

"""return True if f1 and f2 filectx have a common ancestor

830

836

831

Walk back to common ancestor to see if the two files originate

837

Walk back to common ancestor to see if the two files originate

832

from the same file. Since workingfilectx's rev() is None it messes

838

from the same file. Since workingfilectx's rev() is None it messes

833

up the integer comparison logic, hence the pre-step check for

839

up the integer comparison logic, hence the pre-step check for

834

None (f1 and f2 can only be workingfilectx's initially).

840

None (f1 and f2 can only be workingfilectx's initially).

835

"""

841

"""

836

842

837

if f1 == f2:

843

if f1 == f2:

838

return True # a match

844

return True # a match

839

845

840

g1, g2 = f1.ancestors(), f2.ancestors()

846

g1, g2 = f1.ancestors(), f2.ancestors()

841

try:

847

try:

842

f1r, f2r = f1.linkrev(), f2.linkrev()

848

f1r, f2r = f1.linkrev(), f2.linkrev()

843

849

844

if f1r is None:

850

if f1r is None:

845

f1 = next(g1)

851

f1 = next(g1)

846

if f2r is None:

852

if f2r is None:

847

f2 = next(g2)

853

f2 = next(g2)

848

854

849

while True:

855

while True:

850

f1r, f2r = f1.linkrev(), f2.linkrev()

856

f1r, f2r = f1.linkrev(), f2.linkrev()

851

if f1r > f2r:

857

if f1r > f2r:

852

f1 = next(g1)

858

f1 = next(g1)

853

elif f2r > f1r:

859

elif f2r > f1r:

854

f2 = next(g2)

860

f2 = next(g2)

855

else: # f1 and f2 point to files in the same linkrev

861

else: # f1 and f2 point to files in the same linkrev

856

return f1 == f2 # true if they point to the same file

862

return f1 == f2 # true if they point to the same file

857

except StopIteration:

863

except StopIteration:

858

return False

864

return False

859

865

860

866

861

def graftcopies(wctx, ctx, base):

867

def graftcopies(wctx, ctx, base):

862

"""reproduce copies between base and ctx in the wctx

868

"""reproduce copies between base and ctx in the wctx

863

869

864

Unlike mergecopies(), this function will only consider copies between base

870

Unlike mergecopies(), this function will only consider copies between base

865

and ctx; it will ignore copies between base and wctx. Also unlike

871

and ctx; it will ignore copies between base and wctx. Also unlike

866

mergecopies(), this function will apply copies to the working copy (instead

872

mergecopies(), this function will apply copies to the working copy (instead

867

of just returning information about the copies). That makes it cheaper

873

of just returning information about the copies). That makes it cheaper

868

(especially in the common case of base==ctx.p1()) and useful also when

874

(especially in the common case of base==ctx.p1()) and useful also when

869

experimental.copytrace=off.

875

experimental.copytrace=off.

870

876

871

merge.update() will have already marked most copies, but it will only

877

merge.update() will have already marked most copies, but it will only

872

mark copies if it thinks the source files are related (see

878

mark copies if it thinks the source files are related (see

873

merge._related()). It will also not mark copies if the file wasn't modified

879

merge._related()). It will also not mark copies if the file wasn't modified

874

on the local side. This function adds the copies that were "missed"

880

on the local side. This function adds the copies that were "missed"

875

by merge.update().

881

by merge.update().

876

"""

882

"""

877

new_copies = pathcopies(base, ctx)

883

new_copies = pathcopies(base, ctx)

878

_filter(wctx.p1(), wctx, new_copies)

884

_filter(wctx.p1(), wctx, new_copies)

879

for dst, src in pycompat.iteritems(new_copies):

885

for dst, src in pycompat.iteritems(new_copies):

880

wctx[dst].markcopied(src)

886

wctx[dst].markcopied(src)

881

887

882

888

883

def computechangesetfilesadded(ctx):

889

def computechangesetfilesadded(ctx):

884

"""return the list of files added in a changeset

890

"""return the list of files added in a changeset

885

"""

891

"""

886

added = []

892

added = []

887

for f in ctx.files():

893

for f in ctx.files():

888

if not any(f in p for p in ctx.parents()):

894

if not any(f in p for p in ctx.parents()):

889

added.append(f)

895

added.append(f)

890

return added

896

return added

891

897

892

898

893

def computechangesetfilesremoved(ctx):

899

def computechangesetfilesremoved(ctx):

894

"""return the list of files removed in a changeset

900

"""return the list of files removed in a changeset

895

"""

901

"""

896

removed = []

902

removed = []

897

for f in ctx.files():

903

for f in ctx.files():

898

if f not in ctx:

904

if f not in ctx:

899

removed.append(f)

905

removed.append(f)

900

return removed

906

return removed

901

907

902

908

903

def computechangesetcopies(ctx):

909

def computechangesetcopies(ctx):

904

"""return the copies data for a changeset

910

"""return the copies data for a changeset

905

911

906

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

912

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

907

913

908

Each dictionnary are in the form: `{newname: oldname}`

914

Each dictionnary are in the form: `{newname: oldname}`

909

"""

915

"""

910

p1copies = {}

916

p1copies = {}

911

p2copies = {}

917

p2copies = {}

912

p1 = ctx.p1()

918

p1 = ctx.p1()

913

p2 = ctx.p2()

919

p2 = ctx.p2()

914

narrowmatch = ctx._repo.narrowmatch()

920

narrowmatch = ctx._repo.narrowmatch()

915

for dst in ctx.files():

921

for dst in ctx.files():

916

if not narrowmatch(dst) or dst not in ctx:

922

if not narrowmatch(dst) or dst not in ctx:

917

continue

923

continue

918

copied = ctx[dst].renamed()

924

copied = ctx[dst].renamed()

919

if not copied:

925

if not copied:

920

continue

926

continue

921

src, srcnode = copied

927

src, srcnode = copied

922

if src in p1 and p1[src].filenode() == srcnode:

928

if src in p1 and p1[src].filenode() == srcnode:

923

p1copies[dst] = src

929

p1copies[dst] = src

924

elif src in p2 and p2[src].filenode() == srcnode:

930

elif src in p2 and p2[src].filenode() == srcnode:

925

p2copies[dst] = src

931

p2copies[dst] = src

926

return p1copies, p2copies

932

return p1copies, p2copies

927

933

928

934

929

def encodecopies(files, copies):

935

def encodecopies(files, copies):

930

items = []

936

items = []

931

for i, dst in enumerate(files):

937

for i, dst in enumerate(files):

932

if dst in copies:

938

if dst in copies:

933

items.append(b'%d\0%s' % (i, copies[dst]))

939

items.append(b'%d\0%s' % (i, copies[dst]))

934

if len(items) != len(copies):

940

if len(items) != len(copies):

935

raise error.ProgrammingError(

941

raise error.ProgrammingError(

936

b'some copy targets missing from file list'

942

b'some copy targets missing from file list'

937

)

943

)

938

return b"\n".join(items)

944

return b"\n".join(items)

939

945

940

946

941

def decodecopies(files, data):

947

def decodecopies(files, data):

942

try:

948

try:

943

copies = {}

949

copies = {}

944

if not data:

950

if not data:

945

return copies

951

return copies

946

for l in data.split(b'\n'):

952

for l in data.split(b'\n'):

947

strindex, src = l.split(b'\0')

953

strindex, src = l.split(b'\0')

948

i = int(strindex)

954

i = int(strindex)

949

dst = files[i]

955

dst = files[i]

950

copies[dst] = src

956

copies[dst] = src

951

return copies

957

return copies

952

except (ValueError, IndexError):

958

except (ValueError, IndexError):

953

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

959

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

954

# used different syntax for the value.

960

# used different syntax for the value.

955

return None

961

return None

956

962

957

963

958

def encodefileindices(files, subset):

964

def encodefileindices(files, subset):

959

subset = set(subset)

965

subset = set(subset)

960

indices = []

966

indices = []

961

for i, f in enumerate(files):

967

for i, f in enumerate(files):

962

if f in subset:

968

if f in subset:

963

indices.append(b'%d' % i)

969

indices.append(b'%d' % i)

964

return b'\n'.join(indices)

970

return b'\n'.join(indices)

965

971

966

972

967

def decodefileindices(files, data):

973

def decodefileindices(files, data):

968

try:

974

try:

969

subset = []

975

subset = []

970

if not data:

976

if not data:

971

return subset

977

return subset

972

for strindex in data.split(b'\n'):

978

for strindex in data.split(b'\n'):

973

i = int(strindex)

979

i = int(strindex)

974

if i < 0 or i >= len(files):

980

if i < 0 or i >= len(files):

975

return None

981

return None

976

subset.append(files[i])

982

subset.append(files[i])

977

return subset

983

return subset

978

except (ValueError, IndexError):

984

except (ValueError, IndexError):

979

# Perhaps someone had chosen the same key name (e.g. "added") and

985

# Perhaps someone had chosen the same key name (e.g. "added") and

980

# used different syntax for the value.

986

# used different syntax for the value.

981

return None

987

return None

982

988

983

989

984

def _getsidedata(srcrepo, rev):

990

def _getsidedata(srcrepo, rev):

985

ctx = srcrepo[rev]

991

ctx = srcrepo[rev]

986

filescopies = computechangesetcopies(ctx)

992

filescopies = computechangesetcopies(ctx)

987

filesadded = computechangesetfilesadded(ctx)

993

filesadded = computechangesetfilesadded(ctx)

988

filesremoved = computechangesetfilesremoved(ctx)

994

filesremoved = computechangesetfilesremoved(ctx)

989

sidedata = {}

995

sidedata = {}

990

if any([filescopies, filesadded, filesremoved]):

996

if any([filescopies, filesadded, filesremoved]):

991

sortedfiles = sorted(ctx.files())

997

sortedfiles = sorted(ctx.files())

992

p1copies, p2copies = filescopies

998

p1copies, p2copies = filescopies

993

p1copies = encodecopies(sortedfiles, p1copies)

999

p1copies = encodecopies(sortedfiles, p1copies)

994

p2copies = encodecopies(sortedfiles, p2copies)

1000

p2copies = encodecopies(sortedfiles, p2copies)

995

filesadded = encodefileindices(sortedfiles, filesadded)

1001

filesadded = encodefileindices(sortedfiles, filesadded)

996

filesremoved = encodefileindices(sortedfiles, filesremoved)

1002

filesremoved = encodefileindices(sortedfiles, filesremoved)

997

if p1copies:

1003

if p1copies:

998

sidedata[sidedatamod.SD_P1COPIES] = p1copies

1004

sidedata[sidedatamod.SD_P1COPIES] = p1copies

999

if p2copies:

1005

if p2copies:

1000

sidedata[sidedatamod.SD_P2COPIES] = p2copies

1006

sidedata[sidedatamod.SD_P2COPIES] = p2copies

1001

if filesadded:

1007

if filesadded:

1002

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1008

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1003

if filesremoved:

1009

if filesremoved:

1004

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1010

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1005

return sidedata

1011

return sidedata

1006

1012

1007

1013

1008

def getsidedataadder(srcrepo, destrepo):

1014

def getsidedataadder(srcrepo, destrepo):

1009

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

1015

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

1010

if pycompat.iswindows or not use_w:

1016

if pycompat.iswindows or not use_w:

1011

return _get_simple_sidedata_adder(srcrepo, destrepo)

1017

return _get_simple_sidedata_adder(srcrepo, destrepo)

1012

else:

1018

else:

1013

return _get_worker_sidedata_adder(srcrepo, destrepo)

1019

return _get_worker_sidedata_adder(srcrepo, destrepo)

1014

1020

1015

1021

1016

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1022

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1017

"""The function used by worker precomputing sidedata

1023

"""The function used by worker precomputing sidedata

1018

1024

1019

It read an input queue containing revision numbers

1025

It read an input queue containing revision numbers

1020

It write in an output queue containing (rev, <sidedata-map>)

1026

It write in an output queue containing (rev, <sidedata-map>)

1021

1027

1022

The `None` input value is used as a stop signal.

1028

The `None` input value is used as a stop signal.

1023

1029

1024

The `tokens` semaphore is user to avoid having too many unprocessed

1030

The `tokens` semaphore is user to avoid having too many unprocessed

1025

entries. The workers needs to acquire one token before fetching a task.

1031

entries. The workers needs to acquire one token before fetching a task.

1026

They will be released by the consumer of the produced data.

1032

They will be released by the consumer of the produced data.

1027

"""

1033

"""

1028

tokens.acquire()

1034

tokens.acquire()

1029

rev = revs_queue.get()

1035

rev = revs_queue.get()

1030

while rev is not None:

1036

while rev is not None:

1031

data = _getsidedata(srcrepo, rev)

1037

data = _getsidedata(srcrepo, rev)

1032

sidedata_queue.put((rev, data))

1038

sidedata_queue.put((rev, data))

1033

tokens.acquire()

1039

tokens.acquire()

1034

rev = revs_queue.get()

1040

rev = revs_queue.get()

1035

# processing of `None` is completed, release the token.

1041

# processing of `None` is completed, release the token.

1036

tokens.release()

1042

tokens.release()

1037

1043

1038

1044

1039

BUFF_PER_WORKER = 50

1045

BUFF_PER_WORKER = 50

1040

1046

1041

1047

1042

def _get_worker_sidedata_adder(srcrepo, destrepo):

1048

def _get_worker_sidedata_adder(srcrepo, destrepo):

1043

"""The parallel version of the sidedata computation

1049

"""The parallel version of the sidedata computation

1044

1050

1045

This code spawn a pool of worker that precompute a buffer of sidedata

1051

This code spawn a pool of worker that precompute a buffer of sidedata

1046

before we actually need them"""

1052

before we actually need them"""

1047

# avoid circular import copies -> scmutil -> worker -> copies

1053

# avoid circular import copies -> scmutil -> worker -> copies

1048

from . import worker

1054

from . import worker

1049

1055

1050

nbworkers = worker._numworkers(srcrepo.ui)

1056

nbworkers = worker._numworkers(srcrepo.ui)

1051

1057

1052

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1058

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1053

revsq = multiprocessing.Queue()

1059

revsq = multiprocessing.Queue()

1054

sidedataq = multiprocessing.Queue()

1060

sidedataq = multiprocessing.Queue()

1055

1061

1056

assert srcrepo.filtername is None

1062

assert srcrepo.filtername is None

1057

# queue all tasks beforehand, revision numbers are small and it make

1063

# queue all tasks beforehand, revision numbers are small and it make

1058

# synchronisation simpler

1064

# synchronisation simpler

1059

#

1065

#

1060

# Since the computation for each node can be quite expensive, the overhead

1066

# Since the computation for each node can be quite expensive, the overhead

1061

# of using a single queue is not revelant. In practice, most computation

1067

# of using a single queue is not revelant. In practice, most computation

1062

# are fast but some are very expensive and dominate all the other smaller

1068

# are fast but some are very expensive and dominate all the other smaller

1063

# cost.

1069

# cost.

1064

for r in srcrepo.changelog.revs():

1070

for r in srcrepo.changelog.revs():

1065

revsq.put(r)

1071

revsq.put(r)

1066

# queue the "no more tasks" markers

1072

# queue the "no more tasks" markers

1067

for i in range(nbworkers):

1073

for i in range(nbworkers):

1068

revsq.put(None)

1074

revsq.put(None)

1069

1075

1070

allworkers = []

1076

allworkers = []

1071

for i in range(nbworkers):

1077

for i in range(nbworkers):

1072

args = (srcrepo, revsq, sidedataq, tokens)

1078

args = (srcrepo, revsq, sidedataq, tokens)

1073

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1079

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1074

allworkers.append(w)

1080

allworkers.append(w)

1075

w.start()

1081

w.start()

1076

1082

1077

# dictionnary to store results for revision higher than we one we are

1083

# dictionnary to store results for revision higher than we one we are

1078

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1084

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1079

# received, when shelve 43 for later use.

1085

# received, when shelve 43 for later use.

1080

staging = {}

1086

staging = {}

1081

1087

1082

def sidedata_companion(revlog, rev):

1088

def sidedata_companion(revlog, rev):

1083

sidedata = {}

1089

sidedata = {}

1084

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1090

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1085

# Is the data previously shelved ?

1091

# Is the data previously shelved ?

1086

sidedata = staging.pop(rev, None)

1092

sidedata = staging.pop(rev, None)

1087

if sidedata is None:

1093

if sidedata is None:

1088

# look at the queued result until we find the one we are lookig

1094

# look at the queued result until we find the one we are lookig

1089

# for (shelve the other ones)

1095

# for (shelve the other ones)

1090

r, sidedata = sidedataq.get()

1096

r, sidedata = sidedataq.get()

1091

while r != rev:

1097

while r != rev:

1092

staging[r] = sidedata

1098

staging[r] = sidedata

1093

r, sidedata = sidedataq.get()

1099

r, sidedata = sidedataq.get()

1094

tokens.release()

1100

tokens.release()

1095

return False, (), sidedata

1101

return False, (), sidedata

1096

1102

1097

return sidedata_companion

1103

return sidedata_companion

1098

1104

1099

1105

1100

def _get_simple_sidedata_adder(srcrepo, destrepo):

1106

def _get_simple_sidedata_adder(srcrepo, destrepo):

1101

"""The simple version of the sidedata computation

1107

"""The simple version of the sidedata computation

1102

1108

1103

It just compute it in the same thread on request"""

1109

It just compute it in the same thread on request"""

1104

1110

1105

def sidedatacompanion(revlog, rev):

1111

def sidedatacompanion(revlog, rev):

1106

sidedata = {}

1112

sidedata = {}

1107

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1113

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1108

sidedata = _getsidedata(srcrepo, rev)

1114

sidedata = _getsidedata(srcrepo, rev)

1109

return False, (), sidedata

1115

return False, (), sidedata

1110

1116

1111

return sidedatacompanion

1117

return sidedatacompanion

1112

1118

1113

1119

1114

def getsidedataremover(srcrepo, destrepo):

1120

def getsidedataremover(srcrepo, destrepo):

1115

def sidedatacompanion(revlog, rev):

1121

def sidedatacompanion(revlog, rev):

1116

f = ()

1122

f = ()

1117

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1123

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1118

if revlog.flags(rev) & REVIDX_SIDEDATA:

1124

if revlog.flags(rev) & REVIDX_SIDEDATA:

1119

f = (

1125

f = (

1120

sidedatamod.SD_P1COPIES,

1126

sidedatamod.SD_P1COPIES,

1121

sidedatamod.SD_P2COPIES,

1127

sidedatamod.SD_P2COPIES,

1122

sidedatamod.SD_FILESADDED,

1128

sidedatamod.SD_FILESADDED,

1123

sidedatamod.SD_FILESREMOVED,

1129

sidedatamod.SD_FILESREMOVED,

1124

)

1130

)

1125

return False, f, {}

1131

return False, f, {}

1126

1132

1127

return sidedatacompanion

1133

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import multiprocessing
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     # A small cache to avoid doing the work twice for merges
                     #
                     # In the vast majority of cases, if we ask information for a revision
                     # about 1 parent, we'll later ask it for the other. So it make sense to
                     # keep the information around when reaching the first parent of a merge
                     # and dropping it after it was provided for the second parents.
                     #
                     # It exists cases were only one parent of the merge will be walked. It
                     # happens when the "destination" the copy tracing is descendant from a
                     # new root, not common with the "source". In that case, we will only walk
                     # through merge parents that are descendant of changesets common
                     # between "source" and "destination".
                     #
                     # With the current case implementation if such changesets have a copy
                     # information, we'll keep them in memory until the end of
                     # _changesetforwardcopies. We don't expect the case to be frequent
                     # enough to matters.
                     #
                     # In addition, it would be possible to reach pathological case, were
                     # many first parent are met before any second parent is reached. In
                     # that case the cache could grow. If this even become an issue one can
                     # safely introduce a maximum cache size. This would trade extra CPU/IO
                     # time to save memory.
                     merge_caches = {}
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
                             e = merge_caches.pop(rev, None)
                             if e is not None:
                                 return e
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                             if p1 != node.nullrev and p2 != node.nullrev:
                                 # XXX some case we over cache, IGNORE
                                 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = []
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
             def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                             # _chain makes a copies, we can avoid doing so in some
                             # simple/linear cases.
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 return all_copies[targetrev]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return {}, {}, {}, {}, {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
+                dirmove, movewithdir = _dir_renames(repo, c1, c2, copy, fullcopy, u1, u2)
+                return copy, movewithdir, diverge, renamedelete, dirmove
+            def _dir_renames(repo, c1, c2, copy, fullcopy, u1, u2):
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
-                    return copy, {}, diverge, renamedelete, {}
+                    return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
-                return copy, movewithdir, diverge, renamedelete, dirmove
+                return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion