upstream/mercurial-mirror Commit - r45396:d452acc8

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import multiprocessing

11

import multiprocessing

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(prefix, suffix):

67

def _chain(prefix, suffix):

68

"""chain two sets of copies 'prefix' and 'suffix'"""

68

"""chain two sets of copies 'prefix' and 'suffix'"""

69

result = prefix.copy()

69

result = prefix.copy()

70

for key, value in pycompat.iteritems(suffix):

70

for key, value in pycompat.iteritems(suffix):

71

result[key] = prefix.get(value, value)

71

result[key] = prefix.get(value, value)

72

return result

72

return result

73

74

75

def _tracefile(fctx, am, basemf):

75

def _tracefile(fctx, am, basemf):

76

"""return file context that is the ancestor of fctx present in ancestor

76

"""return file context that is the ancestor of fctx present in ancestor

77

manifest am

77

manifest am

78

79

Note: we used to try and stop after a given limit, however checking if that

79

Note: we used to try and stop after a given limit, however checking if that

80

limit is reached turned out to be very expensive. we are better off

80

limit is reached turned out to be very expensive. we are better off

81

disabling that feature."""

81

disabling that feature."""

82

83

for f in fctx.ancestors():

83

for f in fctx.ancestors():

84

path = f.path()

84

path = f.path()

85

if am.get(path, None) == f.filenode():

85

if am.get(path, None) == f.filenode():

86

return path

86

return path

87

if basemf and basemf.get(path, None) == f.filenode():

87

if basemf and basemf.get(path, None) == f.filenode():

88

return path

88

return path

89

90

91

def _dirstatecopies(repo, match=None):

91

def _dirstatecopies(repo, match=None):

92

ds = repo.dirstate

92

ds = repo.dirstate

93

c = ds.copies().copy()

93

c = ds.copies().copy()

94

for k in list(c):

94

for k in list(c):

95

if ds[k] not in b'anm' or (match and not match(k)):

95

if ds[k] not in b'anm' or (match and not match(k)):

96

del c[k]

96

del c[k]

97

return c

97

return c

98

99

100

def _computeforwardmissing(a, b, match=None):

100

def _computeforwardmissing(a, b, match=None):

101

"""Computes which files are in b but not a.

101

"""Computes which files are in b but not a.

102

This is its own function so extensions can easily wrap this call to see what

102

This is its own function so extensions can easily wrap this call to see what

103

files _forwardcopies is about to process.

103

files _forwardcopies is about to process.

104

"""

104

"""

105

ma = a.manifest()

105

ma = a.manifest()

106

mb = b.manifest()

106

mb = b.manifest()

107

return mb.filesnotin(ma, match=match)

107

return mb.filesnotin(ma, match=match)

108

109

110

def usechangesetcentricalgo(repo):

110

def usechangesetcentricalgo(repo):

111

"""Checks if we should use changeset-centric copy algorithms"""

111

"""Checks if we should use changeset-centric copy algorithms"""

112

if repo.filecopiesmode == b'changeset-sidedata':

112

if repo.filecopiesmode == b'changeset-sidedata':

113

return True

113

return True

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

114

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

115

changesetsource = (b'changeset-only', b'compatibility')

115

changesetsource = (b'changeset-only', b'compatibility')

116

return readfrom in changesetsource

116

return readfrom in changesetsource

117

118

119

def _committedforwardcopies(a, b, base, match):

119

def _committedforwardcopies(a, b, base, match):

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

120

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

121

# files might have to be traced back to the fctx parent of the last

121

# files might have to be traced back to the fctx parent of the last

122

# one-side-only changeset, but not further back than that

122

# one-side-only changeset, but not further back than that

123

repo = a._repo

123

repo = a._repo

124

125

if usechangesetcentricalgo(repo):

125

if usechangesetcentricalgo(repo):

126

return _changesetforwardcopies(a, b, match)

126

return _changesetforwardcopies(a, b, match)

127

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

128

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

129

dbg = repo.ui.debug

129

dbg = repo.ui.debug

130

if debug:

130

if debug:

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

131

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

132

am = a.manifest()

132

am = a.manifest()

133

basemf = None if base is None else base.manifest()

133

basemf = None if base is None else base.manifest()

134

135

# find where new files came from

135

# find where new files came from

136

# we currently don't try to find where old files went, too expensive

136

# we currently don't try to find where old files went, too expensive

137

# this means we can miss a case like 'hg rm b; hg cp a b'

137

# this means we can miss a case like 'hg rm b; hg cp a b'

138

cm = {}

138

cm = {}

139

140

# Computing the forward missing is quite expensive on large manifests, since

140

# Computing the forward missing is quite expensive on large manifests, since

141

# it compares the entire manifests. We can optimize it in the common use

141

# it compares the entire manifests. We can optimize it in the common use

142

# case of computing what copies are in a commit versus its parent (like

142

# case of computing what copies are in a commit versus its parent (like

143

# during a rebase or histedit). Note, we exclude merge commits from this

143

# during a rebase or histedit). Note, we exclude merge commits from this

144

# optimization, since the ctx.files() for a merge commit is not correct for

144

# optimization, since the ctx.files() for a merge commit is not correct for

145

# this comparison.

145

# this comparison.

146

forwardmissingmatch = match

146

forwardmissingmatch = match

147

if b.p1() == a and b.p2().node() == node.nullid:

147

if b.p1() == a and b.p2().node() == node.nullid:

148

filesmatcher = matchmod.exact(b.files())

148

filesmatcher = matchmod.exact(b.files())

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

149

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

150

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

151

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

152

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

153

154

if debug:

154

if debug:

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

155

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

156

157

for f in sorted(missing):

157

for f in sorted(missing):

158

if debug:

158

if debug:

159

dbg(b'debug.copies: tracing file: %s\n' % f)

159

dbg(b'debug.copies: tracing file: %s\n' % f)

160

fctx = b[f]

160

fctx = b[f]

161

fctx._ancestrycontext = ancestrycontext

161

fctx._ancestrycontext = ancestrycontext

162

163

if debug:

163

if debug:

164

start = util.timer()

164

start = util.timer()

165

opath = _tracefile(fctx, am, basemf)

165

opath = _tracefile(fctx, am, basemf)

166

if opath:

166

if opath:

167

if debug:

167

if debug:

168

dbg(b'debug.copies: rename of: %s\n' % opath)

168

dbg(b'debug.copies: rename of: %s\n' % opath)

169

cm[f] = opath

169

cm[f] = opath

170

if debug:

170

if debug:

171

dbg(

171

dbg(

172

b'debug.copies: time: %f seconds\n'

172

b'debug.copies: time: %f seconds\n'

173

% (util.timer() - start)

173

% (util.timer() - start)

174

)

174

)

175

return cm

175

return cm

176

177

178

def _revinfogetter(repo):

178

def _revinfogetter(repo):

179

"""return a function that return multiple data given a <rev>"i

179

"""return a function that return multiple data given a <rev>"i

180

181

* p1: revision number of first parent

181

* p1: revision number of first parent

182

* p2: revision number of first parent

182

* p2: revision number of first parent

183

* p1copies: mapping of copies from p1

183

* p1copies: mapping of copies from p1

184

* p2copies: mapping of copies from p2

184

* p2copies: mapping of copies from p2

185

* removed: a list of removed files

185

* removed: a list of removed files

186

"""

186

"""

187

cl = repo.changelog

187

cl = repo.changelog

188

parents = cl.parentrevs

188

parents = cl.parentrevs

189

190

if repo.filecopiesmode == b'changeset-sidedata':

190

if repo.filecopiesmode == b'changeset-sidedata':

191

changelogrevision = cl.changelogrevision

191

changelogrevision = cl.changelogrevision

192

flags = cl.flags

192

flags = cl.flags

193

194

# A small cache to avoid doing the work twice for merges

194

# A small cache to avoid doing the work twice for merges

195

#

195

#

196

# In the vast majority of cases, if we ask information for a revision

196

# In the vast majority of cases, if we ask information for a revision

197

# about 1 parent, we'll later ask it for the other. So it make sense to

197

# about 1 parent, we'll later ask it for the other. So it make sense to

198

# keep the information around when reaching the first parent of a merge

198

# keep the information around when reaching the first parent of a merge

199

# and dropping it after it was provided for the second parents.

199

# and dropping it after it was provided for the second parents.

200

#

200

#

201

# It exists cases were only one parent of the merge will be walked. It

201

# It exists cases were only one parent of the merge will be walked. It

202

# happens when the "destination" the copy tracing is descendant from a

202

# happens when the "destination" the copy tracing is descendant from a

203

# new root, not common with the "source". In that case, we will only walk

203

# new root, not common with the "source". In that case, we will only walk

204

# through merge parents that are descendant of changesets common

204

# through merge parents that are descendant of changesets common

205

# between "source" and "destination".

205

# between "source" and "destination".

206

#

206

#

207

# With the current case implementation if such changesets have a copy

207

# With the current case implementation if such changesets have a copy

208

# information, we'll keep them in memory until the end of

208

# information, we'll keep them in memory until the end of

209

# _changesetforwardcopies. We don't expect the case to be frequent

209

# _changesetforwardcopies. We don't expect the case to be frequent

210

# enough to matters.

210

# enough to matters.

211

#

211

#

212

# In addition, it would be possible to reach pathological case, were

212

# In addition, it would be possible to reach pathological case, were

213

# many first parent are met before any second parent is reached. In

213

# many first parent are met before any second parent is reached. In

214

# that case the cache could grow. If this even become an issue one can

214

# that case the cache could grow. If this even become an issue one can

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

215

# safely introduce a maximum cache size. This would trade extra CPU/IO

216

# time to save memory.

216

# time to save memory.

217

merge_caches = {}

217

merge_caches = {}

218

219

def revinfo(rev):

219

def revinfo(rev):

220

p1, p2 = parents(rev)

220

p1, p2 = parents(rev)

221

if flags(rev) & REVIDX_SIDEDATA:

221

if flags(rev) & REVIDX_SIDEDATA:

222

e = merge_caches.pop(rev, None)

222

e = merge_caches.pop(rev, None)

223

if e is not None:

223

if e is not None:

224

return e

224

return e

225

c = changelogrevision(rev)

225

c = changelogrevision(rev)

226

p1copies = c.p1copies

226

p1copies = c.p1copies

227

p2copies = c.p2copies

227

p2copies = c.p2copies

228

removed = c.filesremoved

228

removed = c.filesremoved

229

if p1 != node.nullrev and p2 != node.nullrev:

229

if p1 != node.nullrev and p2 != node.nullrev:

230

# XXX some case we over cache, IGNORE

230

# XXX some case we over cache, IGNORE

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

231

merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)

232

else:

232

else:

233

p1copies = {}

233

p1copies = {}

234

p2copies = {}

234

p2copies = {}

235

removed = []

235

removed = []

236

return p1, p2, p1copies, p2copies, removed

236

return p1, p2, p1copies, p2copies, removed

237

238

else:

238

else:

239

240

def revinfo(rev):

240

def revinfo(rev):

241

p1, p2 = parents(rev)

241

p1, p2 = parents(rev)

242

ctx = repo[rev]

242

ctx = repo[rev]

243

p1copies, p2copies = ctx._copies

243

p1copies, p2copies = ctx._copies

244

removed = ctx.filesremoved()

244

removed = ctx.filesremoved()

245

return p1, p2, p1copies, p2copies, removed

245

return p1, p2, p1copies, p2copies, removed

246

247

return revinfo

247

return revinfo

248

249

250

def _changesetforwardcopies(a, b, match):

250

def _changesetforwardcopies(a, b, match):

251

if a.rev() in (node.nullrev, b.rev()):

251

if a.rev() in (node.nullrev, b.rev()):

252

return {}

252

return {}

253

254

repo = a.repo().unfiltered()

254

repo = a.repo().unfiltered()

255

children = {}

255

children = {}

256

revinfo = _revinfogetter(repo)

256

revinfo = _revinfogetter(repo)

257

258

cl = repo.changelog

258

cl = repo.changelog

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

259

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

260

mrset = set(missingrevs)

260

mrset = set(missingrevs)

261

roots = set()

261

roots = set()

262

for r in missingrevs:

262

for r in missingrevs:

263

for p in cl.parentrevs(r):

263

for p in cl.parentrevs(r):

264

if p == node.nullrev:

264

if p == node.nullrev:

265

continue

265

continue

266

if p not in children:

266

if p not in children:

267

children[p] = [r]

267

children[p] = [r]

268

else:

268

else:

269

children[p].append(r)

269

children[p].append(r)

270

if p not in mrset:

270

if p not in mrset:

271

roots.add(p)

271

roots.add(p)

272

if not roots:

272

if not roots:

273

# no common revision to track copies from

273

# no common revision to track copies from

274

return {}

274

return {}

275

min_root = min(roots)

275

min_root = min(roots)

276

277

from_head = set(

277

from_head = set(

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

278

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

279

)

279

)

280

281

iterrevs = set(from_head)

281

iterrevs = set(from_head)

282

iterrevs &= mrset

282

iterrevs &= mrset

283

iterrevs.update(roots)

283

iterrevs.update(roots)

284

iterrevs.remove(b.rev())

284

iterrevs.remove(b.rev())

285

revs = sorted(iterrevs)

285

revs = sorted(iterrevs)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

286

return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)

287

288

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

289

def _combinechangesetcopies(revs, children, targetrev, revinfo, match):

290

"""combine the copies information for each item of iterrevs

290

"""combine the copies information for each item of iterrevs

291

292

revs: sorted iterable of revision to visit

292

revs: sorted iterable of revision to visit

293

children: a {parent: [children]} mapping.

293

children: a {parent: [children]} mapping.

294

targetrev: the final copies destination revision (not in iterrevs)

294

targetrev: the final copies destination revision (not in iterrevs)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

295

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

296

match: a matcher

296

match: a matcher

297

298

It returns the aggregated copies information for `targetrev`.

298

It returns the aggregated copies information for `targetrev`.

299

"""

299

"""

300

all_copies = {}

300

all_copies = {}

301

alwaysmatch = match.always()

301

alwaysmatch = match.always()

302

for r in revs:

302

for r in revs:

303

copies = all_copies.pop(r, None)

303

copies = all_copies.pop(r, None)

304

if copies is None:

304

if copies is None:

305

# this is a root

305

# this is a root

306

copies = {}

306

copies = {}

307

for i, c in enumerate(children[r]):

307

for i, c in enumerate(children[r]):

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

308

p1, p2, p1copies, p2copies, removed = revinfo(c)

309

if r == p1:

309

if r == p1:

310

parent = 1

310

parent = 1

311

childcopies = p1copies

311

childcopies = p1copies

312

else:

312

else:

313

assert r == p2

313

assert r == p2

314

parent = 2

314

parent = 2

315

childcopies = p2copies

315

childcopies = p2copies

316

if not alwaysmatch:

316

if not alwaysmatch:

317

childcopies = {

317

childcopies = {

318

dst: src for dst, src in childcopies.items() if match(dst)

318

dst: src for dst, src in childcopies.items() if match(dst)

319

}

319

}

320

newcopies = copies

320

newcopies = copies

321

if childcopies:

321

if childcopies:

322

newcopies = _chain(newcopies, childcopies)

322

newcopies = _chain(newcopies, childcopies)

323

# _chain makes a copies, we can avoid doing so in some

323

# _chain makes a copies, we can avoid doing so in some

324

# simple/linear cases.

324

# simple/linear cases.

325

assert newcopies is not copies

325

assert newcopies is not copies

326

for f in removed:

326

for f in removed:

327

if f in newcopies:

327

if f in newcopies:

328

if newcopies is copies:

328

if newcopies is copies:

329

# copy on write to avoid affecting potential other

329

# copy on write to avoid affecting potential other

330

# branches. when there are no other branches, this

330

# branches. when there are no other branches, this

331

# could be avoided.

331

# could be avoided.

332

newcopies = copies.copy()

332

newcopies = copies.copy()

333

del newcopies[f]

333

del newcopies[f]

334

othercopies = all_copies.get(c)

334

othercopies = all_copies.get(c)

335

if othercopies is None:

335

if othercopies is None:

336

all_copies[c] = newcopies

336

all_copies[c] = newcopies

337

else:

337

else:

338

# we are the second parent to work on c, we need to merge our

338

# we are the second parent to work on c, we need to merge our

339

# work with the other.

339

# work with the other.

340

#

340

#

341

# Unlike when copies are stored in the filelog, we consider

341

# Unlike when copies are stored in the filelog, we consider

342

# it a copy even if the destination already existed on the

342

# it a copy even if the destination already existed on the

343

# other branch. It's simply too expensive to check if the

343

# other branch. It's simply too expensive to check if the

344

# file existed in the manifest.

344

# file existed in the manifest.

345

#

345

#

346

# In case of conflict, parent 1 take precedence over parent 2.

346

# In case of conflict, parent 1 take precedence over parent 2.

347

# This is an arbitrary choice made anew when implementing

347

# This is an arbitrary choice made anew when implementing

348

# changeset based copies. It was made without regards with

348

# changeset based copies. It was made without regards with

349

# potential filelog related behavior.

349

# potential filelog related behavior.

350

if parent == 1:

350

if parent == 1:

351

othercopies.update(newcopies)

351

othercopies.update(newcopies)

352

else:

352

else:

353

newcopies.update(othercopies)

353

newcopies.update(othercopies)

354

all_copies[c] = newcopies

354

all_copies[c] = newcopies

355

return all_copies[targetrev]

355

return all_copies[targetrev]

356

357

358

def _forwardcopies(a, b, base=None, match=None):

358

def _forwardcopies(a, b, base=None, match=None):

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

359

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

360

361

if base is None:

361

if base is None:

362

base = a

362

base = a

363

match = a.repo().narrowmatch(match)

363

match = a.repo().narrowmatch(match)

364

# check for working copy

364

# check for working copy

365

if b.rev() is None:

365

if b.rev() is None:

366

cm = _committedforwardcopies(a, b.p1(), base, match)

366

cm = _committedforwardcopies(a, b.p1(), base, match)

367

# combine copies from dirstate if necessary

367

# combine copies from dirstate if necessary

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

368

copies = _chain(cm, _dirstatecopies(b._repo, match))

369

else:

369

else:

370

copies = _committedforwardcopies(a, b, base, match)

370

copies = _committedforwardcopies(a, b, base, match)

371

return copies

371

return copies

372

373

374

def _backwardrenames(a, b, match):

374

def _backwardrenames(a, b, match):

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

375

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

376

return {}

376

return {}

377

378

# Even though we're not taking copies into account, 1:n rename situations

378

# Even though we're not taking copies into account, 1:n rename situations

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

379

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

380

# arbitrarily pick one of the renames.

380

# arbitrarily pick one of the renames.

381

# We don't want to pass in "match" here, since that would filter

381

# We don't want to pass in "match" here, since that would filter

382

# the destination by it. Since we're reversing the copies, we want

382

# the destination by it. Since we're reversing the copies, we want

383

# to filter the source instead.

383

# to filter the source instead.

384

f = _forwardcopies(b, a)

384

f = _forwardcopies(b, a)

385

r = {}

385

r = {}

386

for k, v in sorted(pycompat.iteritems(f)):

386

for k, v in sorted(pycompat.iteritems(f)):

387

if match and not match(v):

387

if match and not match(v):

388

continue

388

continue

389

# remove copies

389

# remove copies

390

if v in a:

390

if v in a:

391

continue

391

continue

392

r[v] = k

392

r[v] = k

393

return r

393

return r

394

395

396

def pathcopies(x, y, match=None):

396

def pathcopies(x, y, match=None):

397

"""find {dst@y: src@x} copy mapping for directed compare"""

397

"""find {dst@y: src@x} copy mapping for directed compare"""

398

repo = x._repo

398

repo = x._repo

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

399

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

400

if debug:

400

if debug:

401

repo.ui.debug(

401

repo.ui.debug(

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

402

b'debug.copies: searching copies from %s to %s\n' % (x, y)

403

)

403

)

404

if x == y or not x or not y:

404

if x == y or not x or not y:

405

return {}

405

return {}

406

if y.rev() is None and x == y.p1():

406

if y.rev() is None and x == y.p1():

407

if debug:

407

if debug:

408

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

408

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

409

# short-circuit to avoid issues with merge states

409

# short-circuit to avoid issues with merge states

410

return _dirstatecopies(repo, match)

410

return _dirstatecopies(repo, match)

411

a = y.ancestor(x)

411

a = y.ancestor(x)

412

if a == x:

412

if a == x:

413

if debug:

413

if debug:

414

repo.ui.debug(b'debug.copies: search mode: forward\n')

414

repo.ui.debug(b'debug.copies: search mode: forward\n')

415

copies = _forwardcopies(x, y, match=match)

415

copies = _forwardcopies(x, y, match=match)

416

elif a == y:

416

elif a == y:

417

if debug:

417

if debug:

418

repo.ui.debug(b'debug.copies: search mode: backward\n')

418

repo.ui.debug(b'debug.copies: search mode: backward\n')

419

copies = _backwardrenames(x, y, match=match)

419

copies = _backwardrenames(x, y, match=match)

420

else:

420

else:

421

if debug:

421

if debug:

422

repo.ui.debug(b'debug.copies: search mode: combined\n')

422

repo.ui.debug(b'debug.copies: search mode: combined\n')

423

base = None

423

base = None

424

if a.rev() != node.nullrev:

424

if a.rev() != node.nullrev:

425

base = x

425

base = x

426

copies = _chain(

426

copies = _chain(

427

_backwardrenames(x, a, match=match),

427

_backwardrenames(x, a, match=match),

428

_forwardcopies(a, y, base, match=match),

428

_forwardcopies(a, y, base, match=match),

429

)

429

)

430

_filter(x, y, copies)

430

_filter(x, y, copies)

431

return copies

431

return copies

432

433

434

def mergecopies(repo, c1, c2, base):

434

def mergecopies(repo, c1, c2, base):

435

"""

435

"""

436

Finds moves and copies between context c1 and c2 that are relevant for

436

Finds moves and copies between context c1 and c2 that are relevant for

437

merging. 'base' will be used as the merge base.

437

merging. 'base' will be used as the merge base.

438

439

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

439

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

440

files that were moved/ copied in one merge parent and modified in another.

440

files that were moved/ copied in one merge parent and modified in another.

441

For example:

441

For example:

442

443

o ---> 4 another commit

443

o ---> 4 another commit

444

|

444

|

445

| o ---> 3 commit that modifies a.txt

445

| o ---> 3 commit that modifies a.txt

446

| /

446

| /

447

o / ---> 2 commit that moves a.txt to b.txt

447

o / ---> 2 commit that moves a.txt to b.txt

448

|/

448

|/

449

o ---> 1 merge base

449

o ---> 1 merge base

450

451

If we try to rebase revision 3 on revision 4, since there is no a.txt in

451

If we try to rebase revision 3 on revision 4, since there is no a.txt in

452

revision 4, and if user have copytrace disabled, we prints the following

452

revision 4, and if user have copytrace disabled, we prints the following

453

message:

453

message:

454

455

```other changed <file> which local deleted```

455

```other changed <file> which local deleted```

456

457

Returns a tuple where:

457

Returns a tuple where:

458

459

"branch_copies" an instance of branch_copies.

459

"branch_copies" an instance of branch_copies.

460

461

"diverge" is a mapping of source name -> list of destination names

461

"diverge" is a mapping of source name -> list of destination names

462

for divergent renames.

462

for divergent renames.

463

464

This function calls different copytracing algorithms based on config.

464

This function calls different copytracing algorithms based on config.

465

"""

465

"""

466

# avoid silly behavior for update from empty dir

466

# avoid silly behavior for update from empty dir

467

if not c1 or not c2 or c1 == c2:

467

if not c1 or not c2 or c1 == c2:

468

return branch_copies(), branch_copies(), {}

468

return branch_copies(), branch_copies(), {}

469

470

narrowmatch = c1.repo().narrowmatch()

470

narrowmatch = c1.repo().narrowmatch()

471

472

# avoid silly behavior for parent -> working dir

472

# avoid silly behavior for parent -> working dir

473

if c2.node() is None and c1.node() == repo.dirstate.p1():

473

if c2.node() is None and c1.node() == repo.dirstate.p1():

474

return (

474

return (

475

branch_copies(_dirstatecopies(repo, narrowmatch)),

475

branch_copies(_dirstatecopies(repo, narrowmatch)),

476

branch_copies(),

476

branch_copies(),

477

{},

477

{},

478

)

478

)

479

480

copytracing = repo.ui.config(b'experimental', b'copytrace')

480

copytracing = repo.ui.config(b'experimental', b'copytrace')

481

if stringutil.parsebool(copytracing) is False:

481

if stringutil.parsebool(copytracing) is False:

482

# stringutil.parsebool() returns None when it is unable to parse the

482

# stringutil.parsebool() returns None when it is unable to parse the

483

# value, so we should rely on making sure copytracing is on such cases

483

# value, so we should rely on making sure copytracing is on such cases

484

return branch_copies(), branch_copies(), {}

484

return branch_copies(), branch_copies(), {}

485

486

if usechangesetcentricalgo(repo):

486

if usechangesetcentricalgo(repo):

487

# The heuristics don't make sense when we need changeset-centric algos

487

# The heuristics don't make sense when we need changeset-centric algos

488

return _fullcopytracing(repo, c1, c2, base)

488

return _fullcopytracing(repo, c1, c2, base)

489

490

# Copy trace disabling is explicitly below the node == p1 logic above

490

# Copy trace disabling is explicitly below the node == p1 logic above

491

# because the logic above is required for a simple copy to be kept across a

491

# because the logic above is required for a simple copy to be kept across a

492

# rebase.

492

# rebase.

493

if copytracing == b'heuristics':

493

if copytracing == b'heuristics':

494

# Do full copytracing if only non-public revisions are involved as

494

# Do full copytracing if only non-public revisions are involved as

495

# that will be fast enough and will also cover the copies which could

495

# that will be fast enough and will also cover the copies which could

496

# be missed by heuristics

496

# be missed by heuristics

497

if _isfullcopytraceable(repo, c1, base):

497

if _isfullcopytraceable(repo, c1, base):

498

return _fullcopytracing(repo, c1, c2, base)

498

return _fullcopytracing(repo, c1, c2, base)

499

return _heuristicscopytracing(repo, c1, c2, base)

499

return _heuristicscopytracing(repo, c1, c2, base)

500

else:

500

else:

501

return _fullcopytracing(repo, c1, c2, base)

501

return _fullcopytracing(repo, c1, c2, base)

502

503

504

def _isfullcopytraceable(repo, c1, base):

504

def _isfullcopytraceable(repo, c1, base):

505

""" Checks that if base, source and destination are all no-public branches,

505

""" Checks that if base, source and destination are all no-public branches,

506

if yes let's use the full copytrace algorithm for increased capabilities

506

if yes let's use the full copytrace algorithm for increased capabilities

507

since it will be fast enough.

507

since it will be fast enough.

508

509

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

509

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

510

number of changesets from c1 to base such that if number of changesets are

510

number of changesets from c1 to base such that if number of changesets are

511

more than the limit, full copytracing algorithm won't be used.

511

more than the limit, full copytracing algorithm won't be used.

512

"""

512

"""

513

if c1.rev() is None:

513

if c1.rev() is None:

514

c1 = c1.p1()

514

c1 = c1.p1()

515

if c1.mutable() and base.mutable():

515

if c1.mutable() and base.mutable():

516

sourcecommitlimit = repo.ui.configint(

516

sourcecommitlimit = repo.ui.configint(

517

b'experimental', b'copytrace.sourcecommitlimit'

517

b'experimental', b'copytrace.sourcecommitlimit'

518

)

518

)

519

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

519

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

520

return commits < sourcecommitlimit

520

return commits < sourcecommitlimit

521

return False

521

return False

522

523

524

def _checksinglesidecopies(

524

def _checksinglesidecopies(

525

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

525

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

526

):

526

):

527

if src not in m2:

527

if src not in m2:

528

# deleted on side 2

528

# deleted on side 2

529

if src not in m1:

529

if src not in m1:

530

# renamed on side 1, deleted on side 2

530

# renamed on side 1, deleted on side 2

531

renamedelete[src] = dsts1

531

renamedelete[src] = dsts1

532

elif src not in mb:

532

elif src not in mb:

533

# Work around the "short-circuit to avoid issues with merge states"

533

# Work around the "short-circuit to avoid issues with merge states"

534

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

534

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

535

# destination doesn't exist in y.

535

# destination doesn't exist in y.

536

pass

536

pass

537

elif m2[src] != mb[src]:

537

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

538

if not _related(c2[src], base[src]):

538

return

539

return

539

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

540

# modified on side 2

540

# modified on side 2

541

for dst in dsts1:

541

for dst in dsts1:

542

copy[dst] = src

542

copy[dst] = src

543

544

545

class branch_copies(object):

545

class branch_copies(object):

546

"""Information about copies made on one side of a merge/graft.

546

"""Information about copies made on one side of a merge/graft.

547

548

"copy" is a mapping from destination name -> source name,

548

"copy" is a mapping from destination name -> source name,

549

where source is in c1 and destination is in c2 or vice-versa.

549

where source is in c1 and destination is in c2 or vice-versa.

550

551

"movewithdir" is a mapping from source name -> destination name,

551

"movewithdir" is a mapping from source name -> destination name,

552

where the file at source present in one context but not the other

552

where the file at source present in one context but not the other

553

needs to be moved to destination by the merge process, because the

553

needs to be moved to destination by the merge process, because the

554

other context moved the directory it is in.

554

other context moved the directory it is in.

555

556

"renamedelete" is a mapping of source name -> list of destination

556

"renamedelete" is a mapping of source name -> list of destination

557

names for files deleted in c1 that were renamed in c2 or vice-versa.

557

names for files deleted in c1 that were renamed in c2 or vice-versa.

558

559

"dirmove" is a mapping of detected source dir -> destination dir renames.

559

"dirmove" is a mapping of detected source dir -> destination dir renames.

560

This is needed for handling changes to new files previously grafted into

560

This is needed for handling changes to new files previously grafted into

561

renamed directories.

561

renamed directories.

562

"""

562

"""

563

564

def __init__(

564

def __init__(

565

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

565

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

566

):

566

):

567

self.copy = {} if copy is None else copy

567

self.copy = {} if copy is None else copy

568

self.renamedelete = {} if renamedelete is None else renamedelete

568

self.renamedelete = {} if renamedelete is None else renamedelete

569

self.dirmove = {} if dirmove is None else dirmove

569

self.dirmove = {} if dirmove is None else dirmove

570

self.movewithdir = {} if movewithdir is None else movewithdir

570

self.movewithdir = {} if movewithdir is None else movewithdir

571

572

573

def _fullcopytracing(repo, c1, c2, base):

573

def _fullcopytracing(repo, c1, c2, base):

574

""" The full copytracing algorithm which finds all the new files that were

574

""" The full copytracing algorithm which finds all the new files that were

575

added from merge base up to the top commit and for each file it checks if

575

added from merge base up to the top commit and for each file it checks if

576

this file was copied from another file.

576

this file was copied from another file.

577

578

This is pretty slow when a lot of changesets are involved but will track all

578

This is pretty slow when a lot of changesets are involved but will track all

579

the copies.

579

the copies.

580

"""

580

"""

581

m1 = c1.manifest()

581

m1 = c1.manifest()

582

m2 = c2.manifest()

582

m2 = c2.manifest()

583

mb = base.manifest()

583

mb = base.manifest()

584

585

copies1 = pathcopies(base, c1)

585

copies1 = pathcopies(base, c1)

586

copies2 = pathcopies(base, c2)

586

copies2 = pathcopies(base, c2)

587

588

if not (copies1 or copies2):

588

if not (copies1 or copies2):

589

return branch_copies(), branch_copies(), {}

589

return branch_copies(), branch_copies(), {}

590

591

inversecopies1 = {}

591

inversecopies1 = {}

592

inversecopies2 = {}

592

inversecopies2 = {}

593

for dst, src in copies1.items():

593

for dst, src in copies1.items():

594

inversecopies1.setdefault(src, []).append(dst)

594

inversecopies1.setdefault(src, []).append(dst)

595

for dst, src in copies2.items():

595

for dst, src in copies2.items():

596

inversecopies2.setdefault(src, []).append(dst)

596

inversecopies2.setdefault(src, []).append(dst)

597

598

copy1 = {}

598

copy1 = {}

599

copy2 = {}

599

copy2 = {}

600

diverge = {}

600

diverge = {}

601

renamedelete1 = {}

601

renamedelete1 = {}

602

renamedelete2 = {}

602

renamedelete2 = {}

603

allsources = set(inversecopies1) | set(inversecopies2)

603

allsources = set(inversecopies1) | set(inversecopies2)

604

for src in allsources:

604

for src in allsources:

605

dsts1 = inversecopies1.get(src)

605

dsts1 = inversecopies1.get(src)

606

dsts2 = inversecopies2.get(src)

606

dsts2 = inversecopies2.get(src)

607

if dsts1 and dsts2:

607

if dsts1 and dsts2:

608

# copied/renamed on both sides

608

# copied/renamed on both sides

609

if src not in m1 and src not in m2:

609

if src not in m1 and src not in m2:

610

# renamed on both sides

610

# renamed on both sides

611

dsts1 = set(dsts1)

611

dsts1 = set(dsts1)

612

dsts2 = set(dsts2)

612

dsts2 = set(dsts2)

613

# If there's some overlap in the rename destinations, we

613

# If there's some overlap in the rename destinations, we

614

# consider it not divergent. For example, if side 1 copies 'a'

614

# consider it not divergent. For example, if side 1 copies 'a'

615

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

615

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

616

# and 'd' and deletes 'a'.

616

# and 'd' and deletes 'a'.

617

if dsts1 & dsts2:

617

if dsts1 & dsts2:

618

for dst in dsts1 & dsts2:

618

for dst in dsts1 & dsts2:

619

copy1[dst] = src

619

copy1[dst] = src

620

copy2[dst] = src

620

copy2[dst] = src

621

else:

621

else:

622

diverge[src] = sorted(dsts1 | dsts2)

622

diverge[src] = sorted(dsts1 | dsts2)

623

elif src in m1 and src in m2:

623

elif src in m1 and src in m2:

624

# copied on both sides

624

# copied on both sides

625

dsts1 = set(dsts1)

625

dsts1 = set(dsts1)

626

dsts2 = set(dsts2)

626

dsts2 = set(dsts2)

627

for dst in dsts1 & dsts2:

627

for dst in dsts1 & dsts2:

628

copy1[dst] = src

628

copy1[dst] = src

629

copy2[dst] = src

629

copy2[dst] = src

630

# TODO: Handle cases where it was renamed on one side and copied

630

# TODO: Handle cases where it was renamed on one side and copied

631

# on the other side

631

# on the other side

632

elif dsts1:

632

elif dsts1:

633

# copied/renamed only on side 1

633

# copied/renamed only on side 1

634

_checksinglesidecopies(

634

_checksinglesidecopies(

635

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

635

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

636

)

636

)

637

elif dsts2:

637

elif dsts2:

638

# copied/renamed only on side 2

638

# copied/renamed only on side 2

639

_checksinglesidecopies(

639

_checksinglesidecopies(

640

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

640

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

641

)

641

)

642

643

# find interesting file sets from manifests

643

# find interesting file sets from manifests

644

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

644

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

645

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

645

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

646

u1 = sorted(addedinm1 - addedinm2)

646

u1 = sorted(addedinm1 - addedinm2)

647

u2 = sorted(addedinm2 - addedinm1)

647

u2 = sorted(addedinm2 - addedinm1)

648

649

header = b" unmatched files in %s"

649

header = b" unmatched files in %s"

650

if u1:

650

if u1:

651

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

651

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

652

if u2:

652

if u2:

653

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

653

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

654

655

if repo.ui.debugflag:

655

if repo.ui.debugflag:

656

renamedeleteset = set()

656

renamedeleteset = set()

657

divergeset = set()

657

divergeset = set()

658

for dsts in diverge.values():

658

for dsts in diverge.values():

659

divergeset.update(dsts)

659

divergeset.update(dsts)

660

for dsts in renamedelete1.values():

660

for dsts in renamedelete1.values():

661

renamedeleteset.update(dsts)

661

renamedeleteset.update(dsts)

662

for dsts in renamedelete2.values():

662

for dsts in renamedelete2.values():

663

renamedeleteset.update(dsts)

663

renamedeleteset.update(dsts)

664

665

repo.ui.debug(

665

repo.ui.debug(

666

b" all copies found (* = to merge, ! = divergent, "

666

b" all copies found (* = to merge, ! = divergent, "

667

b"% = renamed and deleted):\n"

667

b"% = renamed and deleted):\n"

668

)

668

)

669

for side, copies in ((b"local", copies1), (b"remote", copies2)):

669

for side, copies in ((b"local", copies1), (b"remote", copies2)):

670

if not copies:

670

if not copies:

671

continue

671

continue

672

repo.ui.debug(b" on %s side:\n" % side)

672

repo.ui.debug(b" on %s side:\n" % side)

673

for f in sorted(copies):

673

for f in sorted(copies):

674

note = b""

674

note = b""

675

if f in copy1 or f in copy2:

675

if f in copy1 or f in copy2:

676

note += b"*"

676

note += b"*"

677

if f in divergeset:

677

if f in divergeset:

678

note += b"!"

678

note += b"!"

679

if f in renamedeleteset:

679

if f in renamedeleteset:

680

note += b"%"

680

note += b"%"

681

repo.ui.debug(

681

repo.ui.debug(

682

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

682

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

683

)

683

)

684

del renamedeleteset

684

del renamedeleteset

685

del divergeset

685

del divergeset

686

687

repo.ui.debug(b" checking for directory renames\n")

687

repo.ui.debug(b" checking for directory renames\n")

688

689

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

689

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

690

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

690

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

691

692

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

692

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

693

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

693

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

694

695

return branch_copies1, branch_copies2, diverge

695

return branch_copies1, branch_copies2, diverge

696

697

698

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

698

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

699

"""Finds moved directories and files that should move with them.

699

"""Finds moved directories and files that should move with them.

700

701

ctx: the context for one of the sides

701

ctx: the context for one of the sides

702

copy: files copied on the same side (as ctx)

702

copy: files copied on the same side (as ctx)

703

fullcopy: files copied on the same side (as ctx), including those that

703

fullcopy: files copied on the same side (as ctx), including those that

704

merge.manifestmerge() won't care about

704

merge.manifestmerge() won't care about

705

addedfiles: added files on the other side (compared to ctx)

705

addedfiles: added files on the other side (compared to ctx)

706

"""

706

"""

707

# generate a directory move map

707

# generate a directory move map

708

d = ctx.dirs()

708

d = ctx.dirs()

709

invalid = set()

709

invalid = set()

710

dirmove = {}

710

dirmove = {}

711

712

# examine each file copy for a potential directory move, which is

712

# examine each file copy for a potential directory move, which is

713

# when all the files in a directory are moved to a new directory

713

# when all the files in a directory are moved to a new directory

714

for dst, src in pycompat.iteritems(fullcopy):

714

for dst, src in pycompat.iteritems(fullcopy):

715

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

715

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

716

if dsrc in invalid:

716

if dsrc in invalid:

717

# already seen to be uninteresting

717

# already seen to be uninteresting

718

continue

718

continue

719

elif dsrc in d and ddst in d:

719

elif dsrc in d and ddst in d:

720

# directory wasn't entirely moved locally

720

# directory wasn't entirely moved locally

721

invalid.add(dsrc)

721

invalid.add(dsrc)

722

elif dsrc in dirmove and dirmove[dsrc] != ddst:

722

elif dsrc in dirmove and dirmove[dsrc] != ddst:

723

# files from the same directory moved to two different places

723

# files from the same directory moved to two different places

724

invalid.add(dsrc)

724

invalid.add(dsrc)

725

else:

725

else:

726

# looks good so far

726

# looks good so far

727

dirmove[dsrc] = ddst

727

dirmove[dsrc] = ddst

728

729

for i in invalid:

729

for i in invalid:

730

if i in dirmove:

730

if i in dirmove:

731

del dirmove[i]

731

del dirmove[i]

732

del d, invalid

732

del d, invalid

733

734

if not dirmove:

734

if not dirmove:

735

return {}, {}

735

return {}, {}

736

737

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

737

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

738

739

for d in dirmove:

739

for d in dirmove:

740

repo.ui.debug(

740

repo.ui.debug(

741

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

741

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

742

)

742

)

743

744

movewithdir = {}

744

movewithdir = {}

745

# check unaccounted nonoverlapping files against directory moves

745

# check unaccounted nonoverlapping files against directory moves

746

for f in addedfiles:

746

for f in addedfiles:

747

if f not in fullcopy:

747

if f not in fullcopy:

748

for d in dirmove:

748

for d in dirmove:

749

if f.startswith(d):

749

if f.startswith(d):

750

# new file added in a directory that was moved, move it

750

# new file added in a directory that was moved, move it

751

df = dirmove[d] + f[len(d) :]

751

df = dirmove[d] + f[len(d) :]

752

if df not in copy:

752

if df not in copy:

753

movewithdir[f] = df

753

movewithdir[f] = df

754

repo.ui.debug(

754

repo.ui.debug(

755

b" pending file src: '%s' -> dst: '%s'\n"

755

b" pending file src: '%s' -> dst: '%s'\n"

756

% (f, df)

756

% (f, df)

757

)

757

)

758

break

758

break

759

760

return dirmove, movewithdir

760

return dirmove, movewithdir

761

762

763

def _heuristicscopytracing(repo, c1, c2, base):

763

def _heuristicscopytracing(repo, c1, c2, base):

764

""" Fast copytracing using filename heuristics

764

""" Fast copytracing using filename heuristics

765

766

Assumes that moves or renames are of following two types:

766

Assumes that moves or renames are of following two types:

767

768

1) Inside a directory only (same directory name but different filenames)

768

1) Inside a directory only (same directory name but different filenames)

769

2) Move from one directory to another

769

2) Move from one directory to another

770

(same filenames but different directory names)

770

(same filenames but different directory names)

771

772

Works only when there are no merge commits in the "source branch".

772

Works only when there are no merge commits in the "source branch".

773

Source branch is commits from base up to c2 not including base.

773

Source branch is commits from base up to c2 not including base.

774

775

If merge is involved it fallbacks to _fullcopytracing().

775

If merge is involved it fallbacks to _fullcopytracing().

776

777

Can be used by setting the following config:

777

Can be used by setting the following config:

778

779

[experimental]

779

[experimental]

780

copytrace = heuristics

780

copytrace = heuristics

781

782

In some cases the copy/move candidates found by heuristics can be very large

782

In some cases the copy/move candidates found by heuristics can be very large

783

in number and that will make the algorithm slow. The number of possible

783

in number and that will make the algorithm slow. The number of possible

784

candidates to check can be limited by using the config

784

candidates to check can be limited by using the config

785

`experimental.copytrace.movecandidateslimit` which defaults to 100.

785

`experimental.copytrace.movecandidateslimit` which defaults to 100.

786

"""

786

"""

787

788

if c1.rev() is None:

788

if c1.rev() is None:

789

c1 = c1.p1()

789

c1 = c1.p1()

790

if c2.rev() is None:

790

if c2.rev() is None:

791

c2 = c2.p1()

791

c2 = c2.p1()

792

793

changedfiles = set()

793

changedfiles = set()

794

m1 = c1.manifest()

794

m1 = c1.manifest()

795

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

795

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

796

# If base is not in c2 branch, we switch to fullcopytracing

796

# If base is not in c2 branch, we switch to fullcopytracing

797

repo.ui.debug(

797

repo.ui.debug(

798

b"switching to full copytracing as base is not "

798

b"switching to full copytracing as base is not "

799

b"an ancestor of c2\n"

799

b"an ancestor of c2\n"

800

)

800

)

801

return _fullcopytracing(repo, c1, c2, base)

801

return _fullcopytracing(repo, c1, c2, base)

802

803

ctx = c2

803

ctx = c2

804

while ctx != base:

804

while ctx != base:

805

if len(ctx.parents()) == 2:

805

if len(ctx.parents()) == 2:

806

# To keep things simple let's not handle merges

806

# To keep things simple let's not handle merges

807

repo.ui.debug(b"switching to full copytracing because of merges\n")

807

repo.ui.debug(b"switching to full copytracing because of merges\n")

808

return _fullcopytracing(repo, c1, c2, base)

808

return _fullcopytracing(repo, c1, c2, base)

809

changedfiles.update(ctx.files())

809

changedfiles.update(ctx.files())

810

ctx = ctx.p1()

810

ctx = ctx.p1()

811

812

copies2 = {}

812

copies2 = {}

813

cp = _forwardcopies(base, c2)

813

cp = _forwardcopies(base, c2)

814

for dst, src in pycompat.iteritems(cp):

814

for dst, src in pycompat.iteritems(cp):

815

if src in m1:

815

if src in m1:

816

copies2[dst] = src

816

copies2[dst] = src

817

818

# file is missing if it isn't present in the destination, but is present in

818

# file is missing if it isn't present in the destination, but is present in

819

# the base and present in the source.

819

# the base and present in the source.

820

# Presence in the base is important to exclude added files, presence in the

820

# Presence in the base is important to exclude added files, presence in the

821

# source is important to exclude removed files.

821

# source is important to exclude removed files.

822

filt = lambda f: f not in m1 and f in base and f in c2

822

filt = lambda f: f not in m1 and f in base and f in c2

823

missingfiles = [f for f in changedfiles if filt(f)]

823

missingfiles = [f for f in changedfiles if filt(f)]

824

825

copies1 = {}

825

copies1 = {}

826

if missingfiles:

826

if missingfiles:

827

basenametofilename = collections.defaultdict(list)

827

basenametofilename = collections.defaultdict(list)

828

dirnametofilename = collections.defaultdict(list)

828

dirnametofilename = collections.defaultdict(list)

829

830

for f in m1.filesnotin(base.manifest()):

830

for f in m1.filesnotin(base.manifest()):

831

basename = os.path.basename(f)

831

basename = os.path.basename(f)

832

dirname = os.path.dirname(f)

832

dirname = os.path.dirname(f)

833

basenametofilename[basename].append(f)

833

basenametofilename[basename].append(f)

834

dirnametofilename[dirname].append(f)

834

dirnametofilename[dirname].append(f)

835

836

for f in missingfiles:

836

for f in missingfiles:

837

basename = os.path.basename(f)

837

basename = os.path.basename(f)

838

dirname = os.path.dirname(f)

838

dirname = os.path.dirname(f)

839

samebasename = basenametofilename[basename]

839

samebasename = basenametofilename[basename]

840

samedirname = dirnametofilename[dirname]

840

samedirname = dirnametofilename[dirname]

841

movecandidates = samebasename + samedirname

841

movecandidates = samebasename + samedirname

842

# f is guaranteed to be present in c2, that's why

842

# f is guaranteed to be present in c2, that's why

843

# c2.filectx(f) won't fail

843

# c2.filectx(f) won't fail

844

f2 = c2.filectx(f)

844

f2 = c2.filectx(f)

845

# we can have a lot of candidates which can slow down the heuristics

845

# we can have a lot of candidates which can slow down the heuristics

846

# config value to limit the number of candidates moves to check

846

# config value to limit the number of candidates moves to check

847

maxcandidates = repo.ui.configint(

847

maxcandidates = repo.ui.configint(

848

b'experimental', b'copytrace.movecandidateslimit'

848

b'experimental', b'copytrace.movecandidateslimit'

849

)

849

)

850

851

if len(movecandidates) > maxcandidates:

851

if len(movecandidates) > maxcandidates:

852

repo.ui.status(

852

repo.ui.status(

853

_(

853

_(

854

b"skipping copytracing for '%s', more "

854

b"skipping copytracing for '%s', more "

855

b"candidates than the limit: %d\n"

855

b"candidates than the limit: %d\n"

856

)

856

)

857

% (f, len(movecandidates))

857

% (f, len(movecandidates))

858

)

858

)

859

continue

859

continue

860

861

for candidate in movecandidates:

861

for candidate in movecandidates:

862

f1 = c1.filectx(candidate)

862

f1 = c1.filectx(candidate)

863

if _related(f1, f2):

863

if _related(f1, f2):

864

# if there are a few related copies then we'll merge

864

# if there are a few related copies then we'll merge

865

# changes into all of them. This matches the behaviour

865

# changes into all of them. This matches the behaviour

866

# of upstream copytracing

866

# of upstream copytracing

867

copies1[candidate] = f

867

copies1[candidate] = f

868

869

return branch_copies(copies1), branch_copies(copies2), {}

869

return branch_copies(copies1), branch_copies(copies2), {}

870

871

872

def _related(f1, f2):

872

def _related(f1, f2):

873

"""return True if f1 and f2 filectx have a common ancestor

873

"""return True if f1 and f2 filectx have a common ancestor

874

875

Walk back to common ancestor to see if the two files originate

875

Walk back to common ancestor to see if the two files originate

876

from the same file. Since workingfilectx's rev() is None it messes

876

from the same file. Since workingfilectx's rev() is None it messes

877

up the integer comparison logic, hence the pre-step check for

877

up the integer comparison logic, hence the pre-step check for

878

None (f1 and f2 can only be workingfilectx's initially).

878

None (f1 and f2 can only be workingfilectx's initially).

879

"""

879

"""

880

881

if f1 == f2:

881

if f1 == f2:

882

return True # a match

882

return True # a match

883

884

g1, g2 = f1.ancestors(), f2.ancestors()

884

g1, g2 = f1.ancestors(), f2.ancestors()

885

try:

885

try:

886

f1r, f2r = f1.linkrev(), f2.linkrev()

886

f1r, f2r = f1.linkrev(), f2.linkrev()

887

888

if f1r is None:

888

if f1r is None:

889

f1 = next(g1)

889

f1 = next(g1)

890

if f2r is None:

890

if f2r is None:

891

f2 = next(g2)

891

f2 = next(g2)

892

893

while True:

893

while True:

894

f1r, f2r = f1.linkrev(), f2.linkrev()

894

f1r, f2r = f1.linkrev(), f2.linkrev()

895

if f1r > f2r:

895

if f1r > f2r:

896

f1 = next(g1)

896

f1 = next(g1)

897

elif f2r > f1r:

897

elif f2r > f1r:

898

f2 = next(g2)

898

f2 = next(g2)

899

else: # f1 and f2 point to files in the same linkrev

899

else: # f1 and f2 point to files in the same linkrev

900

return f1 == f2 # true if they point to the same file

900

return f1 == f2 # true if they point to the same file

901

except StopIteration:

901

except StopIteration:

902

return False

902

return False

903

904

905

def graftcopies(wctx, ctx, base):

905

def graftcopies(wctx, ctx, base):

906

"""reproduce copies between base and ctx in the wctx

906

"""reproduce copies between base and ctx in the wctx

907

908

Unlike mergecopies(), this function will only consider copies between base

908

Unlike mergecopies(), this function will only consider copies between base

909

and ctx; it will ignore copies between base and wctx. Also unlike

909

and ctx; it will ignore copies between base and wctx. Also unlike

910

mergecopies(), this function will apply copies to the working copy (instead

910

mergecopies(), this function will apply copies to the working copy (instead

911

of just returning information about the copies). That makes it cheaper

911

of just returning information about the copies). That makes it cheaper

912

(especially in the common case of base==ctx.p1()) and useful also when

912

(especially in the common case of base==ctx.p1()) and useful also when

913

experimental.copytrace=off.

913

experimental.copytrace=off.

914

915

merge.update() will have already marked most copies, but it will only

915

merge.update() will have already marked most copies, but it will only

916

mark copies if it thinks the source files are related (see

916

mark copies if it thinks the source files are related (see

917

merge._related()). It will also not mark copies if the file wasn't modified

917

merge._related()). It will also not mark copies if the file wasn't modified

918

on the local side. This function adds the copies that were "missed"

918

on the local side. This function adds the copies that were "missed"

919

by merge.update().

919

by merge.update().

920

"""

920

"""

921

new_copies = pathcopies(base, ctx)

921

new_copies = pathcopies(base, ctx)

922

_filter(wctx.p1(), wctx, new_copies)

922

_filter(wctx.p1(), wctx, new_copies)

923

for dst, src in pycompat.iteritems(new_copies):

923

for dst, src in pycompat.iteritems(new_copies):

924

wctx[dst].markcopied(src)

924

wctx[dst].markcopied(src)

925

926

927

def computechangesetfilesadded(ctx):

927

def computechangesetfilesadded(ctx):

928

"""return the list of files added in a changeset

928

"""return the list of files added in a changeset

929

"""

929

"""

930

added = []

930

added = []

931

for f in ctx.files():

931

for f in ctx.files():

932

if not any(f in p for p in ctx.parents()):

932

if not any(f in p for p in ctx.parents()):

933

added.append(f)

933

added.append(f)

934

return added

934

return added

935

936

937

def computechangesetfilesremoved(ctx):

937

def computechangesetfilesremoved(ctx):

938

"""return the list of files removed in a changeset

938

"""return the list of files removed in a changeset

939

"""

939

"""

940

removed = []

940

removed = []

941

for f in ctx.files():

941

for f in ctx.files():

942

if f not in ctx:

942

if f not in ctx:

943

removed.append(f)

943

removed.append(f)

944

return removed

944

return removed

945

946

947

def computechangesetcopies(ctx):

947

def computechangesetcopies(ctx):

948

"""return the copies data for a changeset

948

"""return the copies data for a changeset

949

950

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

950

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

951

952

Each dictionnary are in the form: `{newname: oldname}`

952

Each dictionnary are in the form: `{newname: oldname}`

953

"""

953

"""

954

p1copies = {}

954

p1copies = {}

955

p2copies = {}

955

p2copies = {}

956

p1 = ctx.p1()

956

p1 = ctx.p1()

957

p2 = ctx.p2()

957

p2 = ctx.p2()

958

narrowmatch = ctx._repo.narrowmatch()

958

narrowmatch = ctx._repo.narrowmatch()

959

for dst in ctx.files():

959

for dst in ctx.files():

960

if not narrowmatch(dst) or dst not in ctx:

960

if not narrowmatch(dst) or dst not in ctx:

961

continue

961

continue

962

copied = ctx[dst].renamed()

962

copied = ctx[dst].renamed()

963

if not copied:

963

if not copied:

964

continue

964

continue

965

src, srcnode = copied

965

src, srcnode = copied

966

if src in p1 and p1[src].filenode() == srcnode:

966

if src in p1 and p1[src].filenode() == srcnode:

967

p1copies[dst] = src

967

p1copies[dst] = src

968

elif src in p2 and p2[src].filenode() == srcnode:

968

elif src in p2 and p2[src].filenode() == srcnode:

969

p2copies[dst] = src

969

p2copies[dst] = src

970

return p1copies, p2copies

970

return p1copies, p2copies

971

972

973

def encodecopies(files, copies):

973

def encodecopies(files, copies):

974

items = []

974

items = []

975

for i, dst in enumerate(files):

975

for i, dst in enumerate(files):

976

if dst in copies:

976

if dst in copies:

977

items.append(b'%d\0%s' % (i, copies[dst]))

977

items.append(b'%d\0%s' % (i, copies[dst]))

978

if len(items) != len(copies):

978

if len(items) != len(copies):

979

raise error.ProgrammingError(

979

raise error.ProgrammingError(

980

b'some copy targets missing from file list'

980

b'some copy targets missing from file list'

981

)

981

)

982

return b"\n".join(items)

982

return b"\n".join(items)

983

984

985

def decodecopies(files, data):

985

def decodecopies(files, data):

986

try:

986

try:

987

copies = {}

987

copies = {}

988

if not data:

988

if not data:

989

return copies

989

return copies

990

for l in data.split(b'\n'):

990

for l in data.split(b'\n'):

991

strindex, src = l.split(b'\0')

991

strindex, src = l.split(b'\0')

992

i = int(strindex)

992

i = int(strindex)

993

dst = files[i]

993

dst = files[i]

994

copies[dst] = src

994

copies[dst] = src

995

return copies

995

return copies

996

except (ValueError, IndexError):

996

except (ValueError, IndexError):

997

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

997

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

998

# used different syntax for the value.

998

# used different syntax for the value.

999

return None

999

return None

1000

1001

1002

def encodefileindices(files, subset):

1002

def encodefileindices(files, subset):

1003

subset = set(subset)

1003

subset = set(subset)

1004

indices = []

1004

indices = []

1005

for i, f in enumerate(files):

1005

for i, f in enumerate(files):

1006

if f in subset:

1006

if f in subset:

1007

indices.append(b'%d' % i)

1007

indices.append(b'%d' % i)

1008

return b'\n'.join(indices)

1008

return b'\n'.join(indices)

1009

1010

1011

def decodefileindices(files, data):

1011

def decodefileindices(files, data):

1012

try:

1012

try:

1013

subset = []

1013

subset = []

1014

if not data:

1014

if not data:

1015

return subset

1015

return subset

1016

for strindex in data.split(b'\n'):

1016

for strindex in data.split(b'\n'):

1017

i = int(strindex)

1017

i = int(strindex)

1018

if i < 0 or i >= len(files):

1018

if i < 0 or i >= len(files):

1019

return None

1019

return None

1020

subset.append(files[i])

1020

subset.append(files[i])

1021

return subset

1021

return subset

1022

except (ValueError, IndexError):

1022

except (ValueError, IndexError):

1023

# Perhaps someone had chosen the same key name (e.g. "added") and

1023

# Perhaps someone had chosen the same key name (e.g. "added") and

1024

# used different syntax for the value.

1024

# used different syntax for the value.

1025

return None

1025

return None

1026

1027

1028

def _getsidedata(srcrepo, rev):

1028

def _getsidedata(srcrepo, rev):

1029

ctx = srcrepo[rev]

1029

ctx = srcrepo[rev]

1030

filescopies = computechangesetcopies(ctx)

1030

filescopies = computechangesetcopies(ctx)

1031

filesadded = computechangesetfilesadded(ctx)

1031

filesadded = computechangesetfilesadded(ctx)

1032

filesremoved = computechangesetfilesremoved(ctx)

1032

filesremoved = computechangesetfilesremoved(ctx)

1033

sidedata = {}

1033

sidedata = {}

1034

if any([filescopies, filesadded, filesremoved]):

1034

if any([filescopies, filesadded, filesremoved]):

1035

sortedfiles = sorted(ctx.files())

1035

sortedfiles = sorted(ctx.files())

1036

p1copies, p2copies = filescopies

1036

p1copies, p2copies = filescopies

1037

p1copies = encodecopies(sortedfiles, p1copies)

1037

p1copies = encodecopies(sortedfiles, p1copies)

1038

p2copies = encodecopies(sortedfiles, p2copies)

1038

p2copies = encodecopies(sortedfiles, p2copies)

1039

filesadded = encodefileindices(sortedfiles, filesadded)

1039

filesadded = encodefileindices(sortedfiles, filesadded)

1040

filesremoved = encodefileindices(sortedfiles, filesremoved)

1040

filesremoved = encodefileindices(sortedfiles, filesremoved)

1041

if p1copies:

1041

if p1copies:

1042

sidedata[sidedatamod.SD_P1COPIES] = p1copies

1042

sidedata[sidedatamod.SD_P1COPIES] = p1copies

1043

if p2copies:

1043

if p2copies:

1044

sidedata[sidedatamod.SD_P2COPIES] = p2copies

1044

sidedata[sidedatamod.SD_P2COPIES] = p2copies

1045

if filesadded:

1045

if filesadded:

1046

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1046

sidedata[sidedatamod.SD_FILESADDED] = filesadded

1047

if filesremoved:

1047

if filesremoved:

1048

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1048

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

1049

return sidedata

1049

return sidedata

1050

1051

1052

def getsidedataadder(srcrepo, destrepo):

1052

def getsidedataadder(srcrepo, destrepo):

1053

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

1053

use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

1054

if pycompat.iswindows or not use_w:

1054

if pycompat.iswindows or not use_w:

1055

return _get_simple_sidedata_adder(srcrepo, destrepo)

1055

return _get_simple_sidedata_adder(srcrepo, destrepo)

1056

else:

1056

else:

1057

return _get_worker_sidedata_adder(srcrepo, destrepo)

1057

return _get_worker_sidedata_adder(srcrepo, destrepo)

1058

1059

1060

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1060

def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

1061

"""The function used by worker precomputing sidedata

1061

"""The function used by worker precomputing sidedata

1062

1063

It read an input queue containing revision numbers

1063

It read an input queue containing revision numbers

1064

It write in an output queue containing (rev, <sidedata-map>)

1064

It write in an output queue containing (rev, <sidedata-map>)

1065

1066

The `None` input value is used as a stop signal.

1066

The `None` input value is used as a stop signal.

1067

1068

The `tokens` semaphore is user to avoid having too many unprocessed

1068

The `tokens` semaphore is user to avoid having too many unprocessed

1069

entries. The workers needs to acquire one token before fetching a task.

1069

entries. The workers needs to acquire one token before fetching a task.

1070

They will be released by the consumer of the produced data.

1070

They will be released by the consumer of the produced data.

1071

"""

1071

"""

1072

tokens.acquire()

1072

tokens.acquire()

1073

rev = revs_queue.get()

1073

rev = revs_queue.get()

1074

while rev is not None:

1074

while rev is not None:

1075

data = _getsidedata(srcrepo, rev)

1075

data = _getsidedata(srcrepo, rev)

1076

sidedata_queue.put((rev, data))

1076

sidedata_queue.put((rev, data))

1077

tokens.acquire()

1077

tokens.acquire()

1078

rev = revs_queue.get()

1078

rev = revs_queue.get()

1079

# processing of `None` is completed, release the token.

1079

# processing of `None` is completed, release the token.

1080

tokens.release()

1080

tokens.release()

1081

1082

1083

BUFF_PER_WORKER = 50

1083

BUFF_PER_WORKER = 50

1084

1085

1086

def _get_worker_sidedata_adder(srcrepo, destrepo):

1086

def _get_worker_sidedata_adder(srcrepo, destrepo):

1087

"""The parallel version of the sidedata computation

1087

"""The parallel version of the sidedata computation

1088

1089

This code spawn a pool of worker that precompute a buffer of sidedata

1089

This code spawn a pool of worker that precompute a buffer of sidedata

1090

before we actually need them"""

1090

before we actually need them"""

1091

# avoid circular import copies -> scmutil -> worker -> copies

1091

# avoid circular import copies -> scmutil -> worker -> copies

1092

from . import worker

1092

from . import worker

1093

1094

nbworkers = worker._numworkers(srcrepo.ui)

1094

nbworkers = worker._numworkers(srcrepo.ui)

1095

1096

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1096

tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

1097

revsq = multiprocessing.Queue()

1097

revsq = multiprocessing.Queue()

1098

sidedataq = multiprocessing.Queue()

1098

sidedataq = multiprocessing.Queue()

1099

1100

assert srcrepo.filtername is None

1100

assert srcrepo.filtername is None

1101

# queue all tasks beforehand, revision numbers are small and it make

1101

# queue all tasks beforehand, revision numbers are small and it make

1102

# synchronisation simpler

1102

# synchronisation simpler

1103

#

1103

#

1104

# Since the computation for each node can be quite expensive, the overhead

1104

# Since the computation for each node can be quite expensive, the overhead

1105

# of using a single queue is not revelant. In practice, most computation

1105

# of using a single queue is not revelant. In practice, most computation

1106

# are fast but some are very expensive and dominate all the other smaller

1106

# are fast but some are very expensive and dominate all the other smaller

1107

# cost.

1107

# cost.

1108

for r in srcrepo.changelog.revs():

1108

for r in srcrepo.changelog.revs():

1109

revsq.put(r)

1109

revsq.put(r)

1110

# queue the "no more tasks" markers

1110

# queue the "no more tasks" markers

1111

for i in range(nbworkers):

1111

for i in range(nbworkers):

1112

revsq.put(None)

1112

revsq.put(None)

1113

1114

allworkers = []

1114

allworkers = []

1115

for i in range(nbworkers):

1115

for i in range(nbworkers):

1116

args = (srcrepo, revsq, sidedataq, tokens)

1116

args = (srcrepo, revsq, sidedataq, tokens)

1117

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1117

w = multiprocessing.Process(target=_sidedata_worker, args=args)

1118

allworkers.append(w)

1118

allworkers.append(w)

1119

w.start()

1119

w.start()

1120

1121

# dictionnary to store results for revision higher than we one we are

1121

# dictionnary to store results for revision higher than we one we are

1122

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1122

# looking for. For example, if we need the sidedatamap for 42, and 43 is

1123

# received, when shelve 43 for later use.

1123

# received, when shelve 43 for later use.

1124

staging = {}

1124

staging = {}

1125

1126

def sidedata_companion(revlog, rev):

1126

def sidedata_companion(revlog, rev):

1127

sidedata = {}

1127

sidedata = {}

1128

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1128

if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog

1129

# Is the data previously shelved ?

1129

# Is the data previously shelved ?

1130

sidedata = staging.pop(rev, None)

1130

sidedata = staging.pop(rev, None)

1131

if sidedata is None:

1131

if sidedata is None:

1132

# look at the queued result until we find the one we are lookig

1132

# look at the queued result until we find the one we are lookig

1133

# for (shelve the other ones)

1133

# for (shelve the other ones)

1134

r, sidedata = sidedataq.get()

1134

r, sidedata = sidedataq.get()

1135

while r != rev:

1135

while r != rev:

1136

staging[r] = sidedata

1136

staging[r] = sidedata

1137

r, sidedata = sidedataq.get()

1137

r, sidedata = sidedataq.get()

1138

tokens.release()

1138

tokens.release()

1139

return False, (), sidedata

1139

return False, (), sidedata

1140

1141

return sidedata_companion

1141

return sidedata_companion

1142

1143

1144

def _get_simple_sidedata_adder(srcrepo, destrepo):

1144

def _get_simple_sidedata_adder(srcrepo, destrepo):

1145

"""The simple version of the sidedata computation

1145

"""The simple version of the sidedata computation

1146

1147

It just compute it in the same thread on request"""

1147

It just compute it in the same thread on request"""

1148

1149

def sidedatacompanion(revlog, rev):

1149

def sidedatacompanion(revlog, rev):

1150

sidedata = {}

1150

sidedata = {}

1151

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1151

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1152

sidedata = _getsidedata(srcrepo, rev)

1152

sidedata = _getsidedata(srcrepo, rev)

1153

return False, (), sidedata

1153

return False, (), sidedata

1154

1155

return sidedatacompanion

1155

return sidedatacompanion

1156

1157

1158

def getsidedataremover(srcrepo, destrepo):

1158

def getsidedataremover(srcrepo, destrepo):

1159

def sidedatacompanion(revlog, rev):

1159

def sidedatacompanion(revlog, rev):

1160

f = ()

1160

f = ()

1161

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1161

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

1162

if revlog.flags(rev) & REVIDX_SIDEDATA:

1162

if revlog.flags(rev) & REVIDX_SIDEDATA:

1163

f = (

1163

f = (

1164

sidedatamod.SD_P1COPIES,

1164

sidedatamod.SD_P1COPIES,

1165

sidedatamod.SD_P2COPIES,

1165

sidedatamod.SD_P2COPIES,

1166

sidedatamod.SD_FILESADDED,

1166

sidedatamod.SD_FILESADDED,

1167

sidedatamod.SD_FILESREMOVED,

1167

sidedatamod.SD_FILESREMOVED,

1168

)

1168

)

1169

return False, f, {}

1169

return False, f, {}

1170

1171

return sidedatacompanion

1171

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import multiprocessing
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     # A small cache to avoid doing the work twice for merges
                     #
                     # In the vast majority of cases, if we ask information for a revision
                     # about 1 parent, we'll later ask it for the other. So it make sense to
                     # keep the information around when reaching the first parent of a merge
                     # and dropping it after it was provided for the second parents.
                     #
                     # It exists cases were only one parent of the merge will be walked. It
                     # happens when the "destination" the copy tracing is descendant from a
                     # new root, not common with the "source". In that case, we will only walk
                     # through merge parents that are descendant of changesets common
                     # between "source" and "destination".
                     #
                     # With the current case implementation if such changesets have a copy
                     # information, we'll keep them in memory until the end of
                     # _changesetforwardcopies. We don't expect the case to be frequent
                     # enough to matters.
                     #
                     # In addition, it would be possible to reach pathological case, were
                     # many first parent are met before any second parent is reached. In
                     # that case the cache could grow. If this even become an issue one can
                     # safely introduce a maximum cache size. This would trade extra CPU/IO
                     # time to save memory.
                     merge_caches = {}
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
                             e = merge_caches.pop(rev, None)
                             if e is not None:
                                 return e
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                             if p1 != node.nullrev and p2 != node.nullrev:
                                 # XXX some case we over cache, IGNORE
                                 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = []
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
             def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                             # _chain makes a copies, we can avoid doing so in some
                             # simple/linear cases.
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 return all_copies[targetrev]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
-                elif m2[src] != mb[src]:
+                elif mb[src] != m2[src] and not _related(c2[src], base[src]):
-                    if not _related(c2[src], base[src]):
+                    return
-                        return
+                elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion

             ===============================================
             Testing merge involving change to the exec flag
             ===============================================
             #require execbit
             Initial setup
             ==============
               $ hg init base-repo
               $ cd base-repo
               $ cat << EOF > a
               > 1
               > 2
               > 3
               > 4
               > 5
               > 6
               > 7
               > 8
               > 9
               > EOF
               $ touch b
               $ hg add a b
               $ hg commit -m "initial commit"
               $ cd ..
             Testing merging mode change
             ===========================
             setup
             Change on one side, executable bit on the other
               $ hg clone base-repo simple-merge-repo
               updating to branch default
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               $ cd simple-merge-repo
               $ chmod +x a
               $ hg ci -m "make a executable, no change"
               $ [ -x a ] || echo "executable bit not recorded"
               $ hg up ".^"
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               $ cat << EOF > a
               > 1
               > 2
               > 3
               > 4
               > 5
               > 6
               > 7
               > x
               > 9
               > EOF
               $ hg commit -m "edit end of file"
               created new head
             merge them (from the update side)
               $ hg merge 'desc("make a executable, no change")'
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               (branch merge, don't forget to commit)
               $ hg st
               M a
               $ [ -x a ] || echo "executable bit lost"
             merge them (from the chmod side)
               $ hg up -C 'desc("make a executable, no change")'
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               $ hg merge 'desc("edit end of file")'
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               (branch merge, don't forget to commit)
               $ hg st
               M a
               $ [ -x a ] || echo "executable bit lost"
               $ cd ..
             Testing merging mode change with rename
             =======================================
               $ hg clone base-repo rename-merge-repo
               updating to branch default
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               $ cd rename-merge-repo
             make "a" executable on one side
               $ chmod +x a
               $ hg status
               M a
               $ hg ci -m "make a executable"
               $ [ -x a ] || echo "executable bit not recorded"
               $ hg up ".^"
 files updated, 0 files merged, 0 files removed, 0 files unresolved
             make "a" renamed on the other side
               $ hg mv a z
               $ hg st --copies
               A z
                 a
               R a
               $ hg ci -m "rename a to z"
               created new head
             merge them (from the rename side)
               $ hg merge 'desc("make a executable")'
-files updated, 0 files merged, 0 files removed, 0 files unresolved (false !)
+files updated, 0 files merged, 0 files removed, 0 files unresolved
-files updated, 0 files merged, 0 files removed, 0 files unresolved (true !)
               (branch merge, don't forget to commit)
               $ hg st --copies
-              M z (false !)
+              M z
-                a (false !)
               $ [ -x z ] || echo "executable bit lost"
-              executable bit lost (true !)
             merge them (from the chmod side)
               $ hg up -C 'desc("make a executable")'
 files updated, 0 files merged, 1 files removed, 0 files unresolved
               $ hg merge 'desc("rename a to z")'
-files updated, 0 files merged, 0 files removed, 0 files unresolved (false !)
+files updated, 0 files merged, 0 files removed, 0 files unresolved
-files updated, 0 files merged, 1 files removed, 0 files unresolved (true !)
               (branch merge, don't forget to commit)
               $ hg st --copies
               M z
-                a (false !)
               R a
               $ [ -x z ] || echo "executable bit lost"
-              executable bit lost (true !)
               $ cd ..