upstream/mercurial-mirror Commit - r43591:8a292526

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(a, b):

67

def _chain(a, b):

68

"""chain two sets of copies 'a' and 'b'"""

68

"""chain two sets of copies 'a' and 'b'"""

69

t = a.copy()

69

t = a.copy()

70

for k, v in pycompat.iteritems(b):

70

for k, v in pycompat.iteritems(b):

71

if v in t:

71

if v in t:

72

t[k] = t[v]

72

t[k] = t[v]

73

else:

73

else:

74

t[k] = v

74

t[k] = v

75

return t

75

return t

76

77

78

def _tracefile(fctx, am, basemf):

78

def _tracefile(fctx, am, basemf):

79

"""return file context that is the ancestor of fctx present in ancestor

79

"""return file context that is the ancestor of fctx present in ancestor

80

manifest am

80

manifest am

81

82

Note: we used to try and stop after a given limit, however checking if that

82

Note: we used to try and stop after a given limit, however checking if that

83

limit is reached turned out to be very expensive. we are better off

83

limit is reached turned out to be very expensive. we are better off

84

disabling that feature."""

84

disabling that feature."""

85

86

for f in fctx.ancestors():

86

for f in fctx.ancestors():

87

path = f.path()

87

path = f.path()

88

if am.get(path, None) == f.filenode():

88

if am.get(path, None) == f.filenode():

89

return path

89

return path

90

if basemf and basemf.get(path, None) == f.filenode():

90

if basemf and basemf.get(path, None) == f.filenode():

91

return path

91

return path

92

93

94

def _dirstatecopies(repo, match=None):

94

def _dirstatecopies(repo, match=None):

95

ds = repo.dirstate

95

ds = repo.dirstate

96

c = ds.copies().copy()

96

c = ds.copies().copy()

97

for k in list(c):

97

for k in list(c):

98

if ds[k] not in b'anm' or (match and not match(k)):

98

if ds[k] not in b'anm' or (match and not match(k)):

99

del c[k]

99

del c[k]

100

return c

100

return c

101

102

103

def _computeforwardmissing(a, b, match=None):

103

def _computeforwardmissing(a, b, match=None):

104

"""Computes which files are in b but not a.

104

"""Computes which files are in b but not a.

105

This is its own function so extensions can easily wrap this call to see what

105

This is its own function so extensions can easily wrap this call to see what

106

files _forwardcopies is about to process.

106

files _forwardcopies is about to process.

107

"""

107

"""

108

ma = a.manifest()

108

ma = a.manifest()

109

mb = b.manifest()

109

mb = b.manifest()

110

return mb.filesnotin(ma, match=match)

110

return mb.filesnotin(ma, match=match)

111

112

113

def usechangesetcentricalgo(repo):

113

def usechangesetcentricalgo(repo):

114

"""Checks if we should use changeset-centric copy algorithms"""

114

"""Checks if we should use changeset-centric copy algorithms"""

115

if repo.filecopiesmode == b'changeset-sidedata':

115

if repo.filecopiesmode == b'changeset-sidedata':

116

return True

116

return True

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

118

changesetsource = (b'changeset-only', b'compatibility')

118

changesetsource = (b'changeset-only', b'compatibility')

119

return readfrom in changesetsource

119

return readfrom in changesetsource

120

121

122

def _committedforwardcopies(a, b, base, match):

122

def _committedforwardcopies(a, b, base, match):

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

124

# files might have to be traced back to the fctx parent of the last

124

# files might have to be traced back to the fctx parent of the last

125

# one-side-only changeset, but not further back than that

125

# one-side-only changeset, but not further back than that

126

repo = a._repo

126

repo = a._repo

127

128

if usechangesetcentricalgo(repo):

128

if usechangesetcentricalgo(repo):

129

return _changesetforwardcopies(a, b, match)

129

return _changesetforwardcopies(a, b, match)

130

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

132

dbg = repo.ui.debug

132

dbg = repo.ui.debug

133

if debug:

133

if debug:

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

135

am = a.manifest()

135

am = a.manifest()

136

basemf = None if base is None else base.manifest()

136

basemf = None if base is None else base.manifest()

137

138

# find where new files came from

138

# find where new files came from

139

# we currently don't try to find where old files went, too expensive

139

# we currently don't try to find where old files went, too expensive

140

# this means we can miss a case like 'hg rm b; hg cp a b'

140

# this means we can miss a case like 'hg rm b; hg cp a b'

141

cm = {}

141

cm = {}

142

143

# Computing the forward missing is quite expensive on large manifests, since

143

# Computing the forward missing is quite expensive on large manifests, since

144

# it compares the entire manifests. We can optimize it in the common use

144

# it compares the entire manifests. We can optimize it in the common use

145

# case of computing what copies are in a commit versus its parent (like

145

# case of computing what copies are in a commit versus its parent (like

146

# during a rebase or histedit). Note, we exclude merge commits from this

146

# during a rebase or histedit). Note, we exclude merge commits from this

147

# optimization, since the ctx.files() for a merge commit is not correct for

147

# optimization, since the ctx.files() for a merge commit is not correct for

148

# this comparison.

148

# this comparison.

149

forwardmissingmatch = match

149

forwardmissingmatch = match

150

if b.p1() == a and b.p2().node() == node.nullid:

150

if b.p1() == a and b.p2().node() == node.nullid:

151

filesmatcher = matchmod.exact(b.files())

151

filesmatcher = matchmod.exact(b.files())

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

154

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

156

157

if debug:

157

if debug:

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

159

160

for f in sorted(missing):

160

for f in sorted(missing):

161

if debug:

161

if debug:

162

dbg(b'debug.copies: tracing file: %s\n' % f)

162

dbg(b'debug.copies: tracing file: %s\n' % f)

163

fctx = b[f]

163

fctx = b[f]

164

fctx._ancestrycontext = ancestrycontext

164

fctx._ancestrycontext = ancestrycontext

165

166

if debug:

166

if debug:

167

start = util.timer()

167

start = util.timer()

168

opath = _tracefile(fctx, am, basemf)

168

opath = _tracefile(fctx, am, basemf)

169

if opath:

169

if opath:

170

if debug:

170

if debug:

171

dbg(b'debug.copies: rename of: %s\n' % opath)

171

dbg(b'debug.copies: rename of: %s\n' % opath)

172

cm[f] = opath

172

cm[f] = opath

173

if debug:

173

if debug:

174

dbg(

174

dbg(

175

b'debug.copies: time: %f seconds\n'

175

b'debug.copies: time: %f seconds\n'

176

% (util.timer() - start)

176

% (util.timer() - start)

177

)

177

)

178

return cm

178

return cm

179

180

181

def _revinfogetter(repo):

181

def _revinfogetter(repo):

182

"""return a function that return multiple data given a <rev>"i

182

"""return a function that return multiple data given a <rev>"i

183

184

* p1: revision number of first parent

184

* p1: revision number of first parent

185

* p2: revision number of first parent

185

* p2: revision number of first parent

186

* p1copies: mapping of copies from p1

186

* p1copies: mapping of copies from p1

187

* p2copies: mapping of copies from p2

187

* p2copies: mapping of copies from p2

188

* removed: a list of removed files

188

* removed: a list of removed files

189

"""

189

"""

190

cl = repo.changelog

190

cl = repo.changelog

191

parents = cl.parentrevs

191

parents = cl.parentrevs

192

193

if repo.filecopiesmode == b'changeset-sidedata':

193

if repo.filecopiesmode == b'changeset-sidedata':

194

changelogrevision = cl.changelogrevision

194

changelogrevision = cl.changelogrevision

195

flags = cl.flags

195

196

def revinfo(rev):

197

def revinfo(rev):

197

p1, p2 = parents(rev)

198

p1, p2 = parents(rev)

198

c = changelogrevision(rev)

199

if flags(rev) & REVIDX_SIDEDATA:

199

p1copies = c.p1copies

200

c = changelogrevision(rev)

200

p2copies = c.p2copies

201

p1copies = c.p1copies

201

removed = c.filesremoved

202

p2copies = c.p2copies

203

removed = c.filesremoved

204

else:

205

p1copies = {}

206

p2copies = {}

207

removed = ()

202

return p1, p2, p1copies, p2copies, removed

208

return p1, p2, p1copies, p2copies, removed

203

209

204

else:

210

else:

205

211

206

def revinfo(rev):

212

def revinfo(rev):

207

p1, p2 = parents(rev)

213

p1, p2 = parents(rev)

208

ctx = repo[rev]

214

ctx = repo[rev]

209

p1copies, p2copies = ctx._copies

215

p1copies, p2copies = ctx._copies

210

removed = ctx.filesremoved()

216

removed = ctx.filesremoved()

211

return p1, p2, p1copies, p2copies, removed

217

return p1, p2, p1copies, p2copies, removed

212

218

213

return revinfo

219

return revinfo

214

220

215

221

216

def _changesetforwardcopies(a, b, match):

222

def _changesetforwardcopies(a, b, match):

217

if a.rev() in (node.nullrev, b.rev()):

223

if a.rev() in (node.nullrev, b.rev()):

218

return {}

224

return {}

219

225

220

repo = a.repo().unfiltered()

226

repo = a.repo().unfiltered()

221

children = {}

227

children = {}

222

revinfo = _revinfogetter(repo)

228

revinfo = _revinfogetter(repo)

223

229

224

cl = repo.changelog

230

cl = repo.changelog

225

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

231

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

226

for r in missingrevs:

232

for r in missingrevs:

227

for p in cl.parentrevs(r):

233

for p in cl.parentrevs(r):

228

if p == node.nullrev:

234

if p == node.nullrev:

229

continue

235

continue

230

if p not in children:

236

if p not in children:

231

children[p] = [r]

237

children[p] = [r]

232

else:

238

else:

233

children[p].append(r)

239

children[p].append(r)

234

240

235

roots = set(children) - set(missingrevs)

241

roots = set(children) - set(missingrevs)

236

work = list(roots)

242

work = list(roots)

237

all_copies = {r: {} for r in roots}

243

all_copies = {r: {} for r in roots}

238

heapq.heapify(work)

244

heapq.heapify(work)

239

alwaysmatch = match.always()

245

alwaysmatch = match.always()

240

while work:

246

while work:

241

r = heapq.heappop(work)

247

r = heapq.heappop(work)

242

copies = all_copies.pop(r)

248

copies = all_copies.pop(r)

243

if r == b.rev():

249

if r == b.rev():

244

return copies

250

return copies

245

for i, c in enumerate(children[r]):

251

for i, c in enumerate(children[r]):

246

p1, p2, p1copies, p2copies, removed = revinfo(c)

252

p1, p2, p1copies, p2copies, removed = revinfo(c)

247

if r == p1:

253

if r == p1:

248

parent = 1

254

parent = 1

249

childcopies = p1copies

255

childcopies = p1copies

250

else:

256

else:

251

assert r == p2

257

assert r == p2

252

parent = 2

258

parent = 2

253

childcopies = p2copies

259

childcopies = p2copies

254

if not alwaysmatch:

260

if not alwaysmatch:

255

childcopies = {

261

childcopies = {

256

dst: src for dst, src in childcopies.items() if match(dst)

262

dst: src for dst, src in childcopies.items() if match(dst)

257

}

263

}

258

# Copy the dict only if later iterations will also need it

264

# Copy the dict only if later iterations will also need it

259

if i != len(children[r]) - 1:

265

if i != len(children[r]) - 1:

260

newcopies = copies.copy()

266

newcopies = copies.copy()

261

else:

267

else:

262

newcopies = copies

268

newcopies = copies

263

if childcopies:

269

if childcopies:

264

newcopies = _chain(newcopies, childcopies)

270

newcopies = _chain(newcopies, childcopies)

265

for f in removed:

271

for f in removed:

266

if f in newcopies:

272

if f in newcopies:

267

del newcopies[f]

273

del newcopies[f]

268

othercopies = all_copies.get(c)

274

othercopies = all_copies.get(c)

269

if othercopies is None:

275

if othercopies is None:

270

heapq.heappush(work, c)

276

heapq.heappush(work, c)

271

all_copies[c] = newcopies

277

all_copies[c] = newcopies

272

else:

278

else:

273

# we are the second parent to work on c, we need to merge our

279

# we are the second parent to work on c, we need to merge our

274

# work with the other.

280

# work with the other.

275

#

281

#

276

# Unlike when copies are stored in the filelog, we consider

282

# Unlike when copies are stored in the filelog, we consider

277

# it a copy even if the destination already existed on the

283

# it a copy even if the destination already existed on the

278

# other branch. It's simply too expensive to check if the

284

# other branch. It's simply too expensive to check if the

279

# file existed in the manifest.

285

# file existed in the manifest.

280

#

286

#

281

# In case of conflict, parent 1 take precedence over parent 2.

287

# In case of conflict, parent 1 take precedence over parent 2.

282

# This is an arbitrary choice made anew when implementing

288

# This is an arbitrary choice made anew when implementing

283

# changeset based copies. It was made without regards with

289

# changeset based copies. It was made without regards with

284

# potential filelog related behavior.

290

# potential filelog related behavior.

285

if parent == 1:

291

if parent == 1:

286

othercopies.update(newcopies)

292

othercopies.update(newcopies)

287

else:

293

else:

288

newcopies.update(othercopies)

294

newcopies.update(othercopies)

289

all_copies[c] = newcopies

295

all_copies[c] = newcopies

290

assert False

296

assert False

291

297

292

298

293

def _forwardcopies(a, b, base=None, match=None):

299

def _forwardcopies(a, b, base=None, match=None):

294

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

300

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

295

301

296

if base is None:

302

if base is None:

297

base = a

303

base = a

298

match = a.repo().narrowmatch(match)

304

match = a.repo().narrowmatch(match)

299

# check for working copy

305

# check for working copy

300

if b.rev() is None:

306

if b.rev() is None:

301

cm = _committedforwardcopies(a, b.p1(), base, match)

307

cm = _committedforwardcopies(a, b.p1(), base, match)

302

# combine copies from dirstate if necessary

308

# combine copies from dirstate if necessary

303

copies = _chain(cm, _dirstatecopies(b._repo, match))

309

copies = _chain(cm, _dirstatecopies(b._repo, match))

304

else:

310

else:

305

copies = _committedforwardcopies(a, b, base, match)

311

copies = _committedforwardcopies(a, b, base, match)

306

return copies

312

return copies

307

313

308

314

309

def _backwardrenames(a, b, match):

315

def _backwardrenames(a, b, match):

310

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

316

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

311

return {}

317

return {}

312

318

313

# Even though we're not taking copies into account, 1:n rename situations

319

# Even though we're not taking copies into account, 1:n rename situations

314

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

320

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

315

# arbitrarily pick one of the renames.

321

# arbitrarily pick one of the renames.

316

# We don't want to pass in "match" here, since that would filter

322

# We don't want to pass in "match" here, since that would filter

317

# the destination by it. Since we're reversing the copies, we want

323

# the destination by it. Since we're reversing the copies, we want

318

# to filter the source instead.

324

# to filter the source instead.

319

f = _forwardcopies(b, a)

325

f = _forwardcopies(b, a)

320

r = {}

326

r = {}

321

for k, v in sorted(pycompat.iteritems(f)):

327

for k, v in sorted(pycompat.iteritems(f)):

322

if match and not match(v):

328

if match and not match(v):

323

continue

329

continue

324

# remove copies

330

# remove copies

325

if v in a:

331

if v in a:

326

continue

332

continue

327

r[v] = k

333

r[v] = k

328

return r

334

return r

329

335

330

336

331

def pathcopies(x, y, match=None):

337

def pathcopies(x, y, match=None):

332

"""find {dst@y: src@x} copy mapping for directed compare"""

338

"""find {dst@y: src@x} copy mapping for directed compare"""

333

repo = x._repo

339

repo = x._repo

334

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

340

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

335

if debug:

341

if debug:

336

repo.ui.debug(

342

repo.ui.debug(

337

b'debug.copies: searching copies from %s to %s\n' % (x, y)

343

b'debug.copies: searching copies from %s to %s\n' % (x, y)

338

)

344

)

339

if x == y or not x or not y:

345

if x == y or not x or not y:

340

return {}

346

return {}

341

a = y.ancestor(x)

347

a = y.ancestor(x)

342

if a == x:

348

if a == x:

343

if debug:

349

if debug:

344

repo.ui.debug(b'debug.copies: search mode: forward\n')

350

repo.ui.debug(b'debug.copies: search mode: forward\n')

345

if y.rev() is None and x == y.p1():

351

if y.rev() is None and x == y.p1():

346

# short-circuit to avoid issues with merge states

352

# short-circuit to avoid issues with merge states

347

return _dirstatecopies(repo, match)

353

return _dirstatecopies(repo, match)

348

copies = _forwardcopies(x, y, match=match)

354

copies = _forwardcopies(x, y, match=match)

349

elif a == y:

355

elif a == y:

350

if debug:

356

if debug:

351

repo.ui.debug(b'debug.copies: search mode: backward\n')

357

repo.ui.debug(b'debug.copies: search mode: backward\n')

352

copies = _backwardrenames(x, y, match=match)

358

copies = _backwardrenames(x, y, match=match)

353

else:

359

else:

354

if debug:

360

if debug:

355

repo.ui.debug(b'debug.copies: search mode: combined\n')

361

repo.ui.debug(b'debug.copies: search mode: combined\n')

356

base = None

362

base = None

357

if a.rev() != node.nullrev:

363

if a.rev() != node.nullrev:

358

base = x

364

base = x

359

copies = _chain(

365

copies = _chain(

360

_backwardrenames(x, a, match=match),

366

_backwardrenames(x, a, match=match),

361

_forwardcopies(a, y, base, match=match),

367

_forwardcopies(a, y, base, match=match),

362

)

368

)

363

_filter(x, y, copies)

369

_filter(x, y, copies)

364

return copies

370

return copies

365

371

366

372

367

def mergecopies(repo, c1, c2, base):

373

def mergecopies(repo, c1, c2, base):

368

"""

374

"""

369

Finds moves and copies between context c1 and c2 that are relevant for

375

Finds moves and copies between context c1 and c2 that are relevant for

370

merging. 'base' will be used as the merge base.

376

merging. 'base' will be used as the merge base.

371

377

372

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

378

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

373

files that were moved/ copied in one merge parent and modified in another.

379

files that were moved/ copied in one merge parent and modified in another.

374

For example:

380

For example:

375

381

376

o ---> 4 another commit

382

o ---> 4 another commit

377

|

383

|

378

| o ---> 3 commit that modifies a.txt

384

| o ---> 3 commit that modifies a.txt

379

| /

385

| /

380

o / ---> 2 commit that moves a.txt to b.txt

386

o / ---> 2 commit that moves a.txt to b.txt

381

|/

387

|/

382

o ---> 1 merge base

388

o ---> 1 merge base

383

389

384

If we try to rebase revision 3 on revision 4, since there is no a.txt in

390

If we try to rebase revision 3 on revision 4, since there is no a.txt in

385

revision 4, and if user have copytrace disabled, we prints the following

391

revision 4, and if user have copytrace disabled, we prints the following

386

message:

392

message:

387

393

388

```other changed <file> which local deleted```

394

```other changed <file> which local deleted```

389

395

390

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

396

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

391

"dirmove".

397

"dirmove".

392

398

393

"copy" is a mapping from destination name -> source name,

399

"copy" is a mapping from destination name -> source name,

394

where source is in c1 and destination is in c2 or vice-versa.

400

where source is in c1 and destination is in c2 or vice-versa.

395

401

396

"movewithdir" is a mapping from source name -> destination name,

402

"movewithdir" is a mapping from source name -> destination name,

397

where the file at source present in one context but not the other

403

where the file at source present in one context but not the other

398

needs to be moved to destination by the merge process, because the

404

needs to be moved to destination by the merge process, because the

399

other context moved the directory it is in.

405

other context moved the directory it is in.

400

406

401

"diverge" is a mapping of source name -> list of destination names

407

"diverge" is a mapping of source name -> list of destination names

402

for divergent renames.

408

for divergent renames.

403

409

404

"renamedelete" is a mapping of source name -> list of destination

410

"renamedelete" is a mapping of source name -> list of destination

405

names for files deleted in c1 that were renamed in c2 or vice-versa.

411

names for files deleted in c1 that were renamed in c2 or vice-versa.

406

412

407

"dirmove" is a mapping of detected source dir -> destination dir renames.

413

"dirmove" is a mapping of detected source dir -> destination dir renames.

408

This is needed for handling changes to new files previously grafted into

414

This is needed for handling changes to new files previously grafted into

409

renamed directories.

415

renamed directories.

410

416

411

This function calls different copytracing algorithms based on config.

417

This function calls different copytracing algorithms based on config.

412

"""

418

"""

413

# avoid silly behavior for update from empty dir

419

# avoid silly behavior for update from empty dir

414

if not c1 or not c2 or c1 == c2:

420

if not c1 or not c2 or c1 == c2:

415

return {}, {}, {}, {}, {}

421

return {}, {}, {}, {}, {}

416

422

417

narrowmatch = c1.repo().narrowmatch()

423

narrowmatch = c1.repo().narrowmatch()

418

424

419

# avoid silly behavior for parent -> working dir

425

# avoid silly behavior for parent -> working dir

420

if c2.node() is None and c1.node() == repo.dirstate.p1():

426

if c2.node() is None and c1.node() == repo.dirstate.p1():

421

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

427

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

422

428

423

copytracing = repo.ui.config(b'experimental', b'copytrace')

429

copytracing = repo.ui.config(b'experimental', b'copytrace')

424

if stringutil.parsebool(copytracing) is False:

430

if stringutil.parsebool(copytracing) is False:

425

# stringutil.parsebool() returns None when it is unable to parse the

431

# stringutil.parsebool() returns None when it is unable to parse the

426

# value, so we should rely on making sure copytracing is on such cases

432

# value, so we should rely on making sure copytracing is on such cases

427

return {}, {}, {}, {}, {}

433

return {}, {}, {}, {}, {}

428

434

429

if usechangesetcentricalgo(repo):

435

if usechangesetcentricalgo(repo):

430

# The heuristics don't make sense when we need changeset-centric algos

436

# The heuristics don't make sense when we need changeset-centric algos

431

return _fullcopytracing(repo, c1, c2, base)

437

return _fullcopytracing(repo, c1, c2, base)

432

438

433

# Copy trace disabling is explicitly below the node == p1 logic above

439

# Copy trace disabling is explicitly below the node == p1 logic above

434

# because the logic above is required for a simple copy to be kept across a

440

# because the logic above is required for a simple copy to be kept across a

435

# rebase.

441

# rebase.

436

if copytracing == b'heuristics':

442

if copytracing == b'heuristics':

437

# Do full copytracing if only non-public revisions are involved as

443

# Do full copytracing if only non-public revisions are involved as

438

# that will be fast enough and will also cover the copies which could

444

# that will be fast enough and will also cover the copies which could

439

# be missed by heuristics

445

# be missed by heuristics

440

if _isfullcopytraceable(repo, c1, base):

446

if _isfullcopytraceable(repo, c1, base):

441

return _fullcopytracing(repo, c1, c2, base)

447

return _fullcopytracing(repo, c1, c2, base)

442

return _heuristicscopytracing(repo, c1, c2, base)

448

return _heuristicscopytracing(repo, c1, c2, base)

443

else:

449

else:

444

return _fullcopytracing(repo, c1, c2, base)

450

return _fullcopytracing(repo, c1, c2, base)

445

451

446

452

447

def _isfullcopytraceable(repo, c1, base):

453

def _isfullcopytraceable(repo, c1, base):

448

""" Checks that if base, source and destination are all no-public branches,

454

""" Checks that if base, source and destination are all no-public branches,

449

if yes let's use the full copytrace algorithm for increased capabilities

455

if yes let's use the full copytrace algorithm for increased capabilities

450

since it will be fast enough.

456

since it will be fast enough.

451

457

452

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

458

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

453

number of changesets from c1 to base such that if number of changesets are

459

number of changesets from c1 to base such that if number of changesets are

454

more than the limit, full copytracing algorithm won't be used.

460

more than the limit, full copytracing algorithm won't be used.

455

"""

461

"""

456

if c1.rev() is None:

462

if c1.rev() is None:

457

c1 = c1.p1()

463

c1 = c1.p1()

458

if c1.mutable() and base.mutable():

464

if c1.mutable() and base.mutable():

459

sourcecommitlimit = repo.ui.configint(

465

sourcecommitlimit = repo.ui.configint(

460

b'experimental', b'copytrace.sourcecommitlimit'

466

b'experimental', b'copytrace.sourcecommitlimit'

461

)

467

)

462

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

468

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

463

return commits < sourcecommitlimit

469

return commits < sourcecommitlimit

464

return False

470

return False

465

471

466

472

467

def _checksinglesidecopies(

473

def _checksinglesidecopies(

468

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

474

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

469

):

475

):

470

if src not in m2:

476

if src not in m2:

471

# deleted on side 2

477

# deleted on side 2

472

if src not in m1:

478

if src not in m1:

473

# renamed on side 1, deleted on side 2

479

# renamed on side 1, deleted on side 2

474

renamedelete[src] = dsts1

480

renamedelete[src] = dsts1

475

elif m2[src] != mb[src]:

481

elif m2[src] != mb[src]:

476

if not _related(c2[src], base[src]):

482

if not _related(c2[src], base[src]):

477

return

483

return

478

# modified on side 2

484

# modified on side 2

479

for dst in dsts1:

485

for dst in dsts1:

480

if dst not in m2:

486

if dst not in m2:

481

# dst not added on side 2 (handle as regular

487

# dst not added on side 2 (handle as regular

482

# "both created" case in manifestmerge otherwise)

488

# "both created" case in manifestmerge otherwise)

483

copy[dst] = src

489

copy[dst] = src

484

490

485

491

486

def _fullcopytracing(repo, c1, c2, base):

492

def _fullcopytracing(repo, c1, c2, base):

487

""" The full copytracing algorithm which finds all the new files that were

493

""" The full copytracing algorithm which finds all the new files that were

488

added from merge base up to the top commit and for each file it checks if

494

added from merge base up to the top commit and for each file it checks if

489

this file was copied from another file.

495

this file was copied from another file.

490

496

491

This is pretty slow when a lot of changesets are involved but will track all

497

This is pretty slow when a lot of changesets are involved but will track all

492

the copies.

498

the copies.

493

"""

499

"""

494

m1 = c1.manifest()

500

m1 = c1.manifest()

495

m2 = c2.manifest()

501

m2 = c2.manifest()

496

mb = base.manifest()

502

mb = base.manifest()

497

503

498

copies1 = pathcopies(base, c1)

504

copies1 = pathcopies(base, c1)

499

copies2 = pathcopies(base, c2)

505

copies2 = pathcopies(base, c2)

500

506

501

inversecopies1 = {}

507

inversecopies1 = {}

502

inversecopies2 = {}

508

inversecopies2 = {}

503

for dst, src in copies1.items():

509

for dst, src in copies1.items():

504

inversecopies1.setdefault(src, []).append(dst)

510

inversecopies1.setdefault(src, []).append(dst)

505

for dst, src in copies2.items():

511

for dst, src in copies2.items():

506

inversecopies2.setdefault(src, []).append(dst)

512

inversecopies2.setdefault(src, []).append(dst)

507

513

508

copy = {}

514

copy = {}

509

diverge = {}

515

diverge = {}

510

renamedelete = {}

516

renamedelete = {}

511

allsources = set(inversecopies1) | set(inversecopies2)

517

allsources = set(inversecopies1) | set(inversecopies2)

512

for src in allsources:

518

for src in allsources:

513

dsts1 = inversecopies1.get(src)

519

dsts1 = inversecopies1.get(src)

514

dsts2 = inversecopies2.get(src)

520

dsts2 = inversecopies2.get(src)

515

if dsts1 and dsts2:

521

if dsts1 and dsts2:

516

# copied/renamed on both sides

522

# copied/renamed on both sides

517

if src not in m1 and src not in m2:

523

if src not in m1 and src not in m2:

518

# renamed on both sides

524

# renamed on both sides

519

dsts1 = set(dsts1)

525

dsts1 = set(dsts1)

520

dsts2 = set(dsts2)

526

dsts2 = set(dsts2)

521

# If there's some overlap in the rename destinations, we

527

# If there's some overlap in the rename destinations, we

522

# consider it not divergent. For example, if side 1 copies 'a'

528

# consider it not divergent. For example, if side 1 copies 'a'

523

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

529

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

524

# and 'd' and deletes 'a'.

530

# and 'd' and deletes 'a'.

525

if dsts1 & dsts2:

531

if dsts1 & dsts2:

526

for dst in dsts1 & dsts2:

532

for dst in dsts1 & dsts2:

527

copy[dst] = src

533

copy[dst] = src

528

else:

534

else:

529

diverge[src] = sorted(dsts1 | dsts2)

535

diverge[src] = sorted(dsts1 | dsts2)

530

elif src in m1 and src in m2:

536

elif src in m1 and src in m2:

531

# copied on both sides

537

# copied on both sides

532

dsts1 = set(dsts1)

538

dsts1 = set(dsts1)

533

dsts2 = set(dsts2)

539

dsts2 = set(dsts2)

534

for dst in dsts1 & dsts2:

540

for dst in dsts1 & dsts2:

535

copy[dst] = src

541

copy[dst] = src

536

# TODO: Handle cases where it was renamed on one side and copied

542

# TODO: Handle cases where it was renamed on one side and copied

537

# on the other side

543

# on the other side

538

elif dsts1:

544

elif dsts1:

539

# copied/renamed only on side 1

545

# copied/renamed only on side 1

540

_checksinglesidecopies(

546

_checksinglesidecopies(

541

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

547

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

542

)

548

)

543

elif dsts2:

549

elif dsts2:

544

# copied/renamed only on side 2

550

# copied/renamed only on side 2

545

_checksinglesidecopies(

551

_checksinglesidecopies(

546

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

552

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

547

)

553

)

548

554

549

renamedeleteset = set()

555

renamedeleteset = set()

550

divergeset = set()

556

divergeset = set()

551

for dsts in diverge.values():

557

for dsts in diverge.values():

552

divergeset.update(dsts)

558

divergeset.update(dsts)

553

for dsts in renamedelete.values():

559

for dsts in renamedelete.values():

554

renamedeleteset.update(dsts)

560

renamedeleteset.update(dsts)

555

561

556

# find interesting file sets from manifests

562

# find interesting file sets from manifests

557

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

563

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

558

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

564

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

559

u1 = sorted(addedinm1 - addedinm2)

565

u1 = sorted(addedinm1 - addedinm2)

560

u2 = sorted(addedinm2 - addedinm1)

566

u2 = sorted(addedinm2 - addedinm1)

561

567

562

header = b" unmatched files in %s"

568

header = b" unmatched files in %s"

563

if u1:

569

if u1:

564

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

570

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

565

if u2:

571

if u2:

566

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

572

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

567

573

568

fullcopy = copies1.copy()

574

fullcopy = copies1.copy()

569

fullcopy.update(copies2)

575

fullcopy.update(copies2)

570

if not fullcopy:

576

if not fullcopy:

571

return copy, {}, diverge, renamedelete, {}

577

return copy, {}, diverge, renamedelete, {}

572

578

573

if repo.ui.debugflag:

579

if repo.ui.debugflag:

574

repo.ui.debug(

580

repo.ui.debug(

575

b" all copies found (* = to merge, ! = divergent, "

581

b" all copies found (* = to merge, ! = divergent, "

576

b"% = renamed and deleted):\n"

582

b"% = renamed and deleted):\n"

577

)

583

)

578

for f in sorted(fullcopy):

584

for f in sorted(fullcopy):

579

note = b""

585

note = b""

580

if f in copy:

586

if f in copy:

581

note += b"*"

587

note += b"*"

582

if f in divergeset:

588

if f in divergeset:

583

note += b"!"

589

note += b"!"

584

if f in renamedeleteset:

590

if f in renamedeleteset:

585

note += b"%"

591

note += b"%"

586

repo.ui.debug(

592

repo.ui.debug(

587

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

593

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

588

)

594

)

589

del divergeset

595

del divergeset

590

596

591

repo.ui.debug(b" checking for directory renames\n")

597

repo.ui.debug(b" checking for directory renames\n")

592

598

593

# generate a directory move map

599

# generate a directory move map

594

d1, d2 = c1.dirs(), c2.dirs()

600

d1, d2 = c1.dirs(), c2.dirs()

595

invalid = set()

601

invalid = set()

596

dirmove = {}

602

dirmove = {}

597

603

598

# examine each file copy for a potential directory move, which is

604

# examine each file copy for a potential directory move, which is

599

# when all the files in a directory are moved to a new directory

605

# when all the files in a directory are moved to a new directory

600

for dst, src in pycompat.iteritems(fullcopy):

606

for dst, src in pycompat.iteritems(fullcopy):

601

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

607

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

602

if dsrc in invalid:

608

if dsrc in invalid:

603

# already seen to be uninteresting

609

# already seen to be uninteresting

604

continue

610

continue

605

elif dsrc in d1 and ddst in d1:

611

elif dsrc in d1 and ddst in d1:

606

# directory wasn't entirely moved locally

612

# directory wasn't entirely moved locally

607

invalid.add(dsrc)

613

invalid.add(dsrc)

608

elif dsrc in d2 and ddst in d2:

614

elif dsrc in d2 and ddst in d2:

609

# directory wasn't entirely moved remotely

615

# directory wasn't entirely moved remotely

610

invalid.add(dsrc)

616

invalid.add(dsrc)

611

elif dsrc in dirmove and dirmove[dsrc] != ddst:

617

elif dsrc in dirmove and dirmove[dsrc] != ddst:

612

# files from the same directory moved to two different places

618

# files from the same directory moved to two different places

613

invalid.add(dsrc)

619

invalid.add(dsrc)

614

else:

620

else:

615

# looks good so far

621

# looks good so far

616

dirmove[dsrc] = ddst

622

dirmove[dsrc] = ddst

617

623

618

for i in invalid:

624

for i in invalid:

619

if i in dirmove:

625

if i in dirmove:

620

del dirmove[i]

626

del dirmove[i]

621

del d1, d2, invalid

627

del d1, d2, invalid

622

628

623

if not dirmove:

629

if not dirmove:

624

return copy, {}, diverge, renamedelete, {}

630

return copy, {}, diverge, renamedelete, {}

625

631

626

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

632

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

627

633

628

for d in dirmove:

634

for d in dirmove:

629

repo.ui.debug(

635

repo.ui.debug(

630

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

636

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

631

)

637

)

632

638

633

movewithdir = {}

639

movewithdir = {}

634

# check unaccounted nonoverlapping files against directory moves

640

# check unaccounted nonoverlapping files against directory moves

635

for f in u1 + u2:

641

for f in u1 + u2:

636

if f not in fullcopy:

642

if f not in fullcopy:

637

for d in dirmove:

643

for d in dirmove:

638

if f.startswith(d):

644

if f.startswith(d):

639

# new file added in a directory that was moved, move it

645

# new file added in a directory that was moved, move it

640

df = dirmove[d] + f[len(d) :]

646

df = dirmove[d] + f[len(d) :]

641

if df not in copy:

647

if df not in copy:

642

movewithdir[f] = df

648

movewithdir[f] = df

643

repo.ui.debug(

649

repo.ui.debug(

644

b" pending file src: '%s' -> dst: '%s'\n"

650

b" pending file src: '%s' -> dst: '%s'\n"

645

% (f, df)

651

% (f, df)

646

)

652

)

647

break

653

break

648

654

649

return copy, movewithdir, diverge, renamedelete, dirmove

655

return copy, movewithdir, diverge, renamedelete, dirmove

650

656

651

657

652

def _heuristicscopytracing(repo, c1, c2, base):

658

def _heuristicscopytracing(repo, c1, c2, base):

653

""" Fast copytracing using filename heuristics

659

""" Fast copytracing using filename heuristics

654

660

655

Assumes that moves or renames are of following two types:

661

Assumes that moves or renames are of following two types:

656

662

657

1) Inside a directory only (same directory name but different filenames)

663

1) Inside a directory only (same directory name but different filenames)

658

2) Move from one directory to another

664

2) Move from one directory to another

659

(same filenames but different directory names)

665

(same filenames but different directory names)

660

666

661

Works only when there are no merge commits in the "source branch".

667

Works only when there are no merge commits in the "source branch".

662

Source branch is commits from base up to c2 not including base.

668

Source branch is commits from base up to c2 not including base.

663

669

664

If merge is involved it fallbacks to _fullcopytracing().

670

If merge is involved it fallbacks to _fullcopytracing().

665

671

666

Can be used by setting the following config:

672

Can be used by setting the following config:

667

673

668

[experimental]

674

[experimental]

669

copytrace = heuristics

675

copytrace = heuristics

670

676

671

In some cases the copy/move candidates found by heuristics can be very large

677

In some cases the copy/move candidates found by heuristics can be very large

672

in number and that will make the algorithm slow. The number of possible

678

in number and that will make the algorithm slow. The number of possible

673

candidates to check can be limited by using the config

679

candidates to check can be limited by using the config

674

`experimental.copytrace.movecandidateslimit` which defaults to 100.

680

`experimental.copytrace.movecandidateslimit` which defaults to 100.

675

"""

681

"""

676

682

677

if c1.rev() is None:

683

if c1.rev() is None:

678

c1 = c1.p1()

684

c1 = c1.p1()

679

if c2.rev() is None:

685

if c2.rev() is None:

680

c2 = c2.p1()

686

c2 = c2.p1()

681

687

682

copies = {}

688

copies = {}

683

689

684

changedfiles = set()

690

changedfiles = set()

685

m1 = c1.manifest()

691

m1 = c1.manifest()

686

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

692

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

687

# If base is not in c2 branch, we switch to fullcopytracing

693

# If base is not in c2 branch, we switch to fullcopytracing

688

repo.ui.debug(

694

repo.ui.debug(

689

b"switching to full copytracing as base is not "

695

b"switching to full copytracing as base is not "

690

b"an ancestor of c2\n"

696

b"an ancestor of c2\n"

691

)

697

)

692

return _fullcopytracing(repo, c1, c2, base)

698

return _fullcopytracing(repo, c1, c2, base)

693

699

694

ctx = c2

700

ctx = c2

695

while ctx != base:

701

while ctx != base:

696

if len(ctx.parents()) == 2:

702

if len(ctx.parents()) == 2:

697

# To keep things simple let's not handle merges

703

# To keep things simple let's not handle merges

698

repo.ui.debug(b"switching to full copytracing because of merges\n")

704

repo.ui.debug(b"switching to full copytracing because of merges\n")

699

return _fullcopytracing(repo, c1, c2, base)

705

return _fullcopytracing(repo, c1, c2, base)

700

changedfiles.update(ctx.files())

706

changedfiles.update(ctx.files())

701

ctx = ctx.p1()

707

ctx = ctx.p1()

702

708

703

cp = _forwardcopies(base, c2)

709

cp = _forwardcopies(base, c2)

704

for dst, src in pycompat.iteritems(cp):

710

for dst, src in pycompat.iteritems(cp):

705

if src in m1:

711

if src in m1:

706

copies[dst] = src

712

copies[dst] = src

707

713

708

# file is missing if it isn't present in the destination, but is present in

714

# file is missing if it isn't present in the destination, but is present in

709

# the base and present in the source.

715

# the base and present in the source.

710

# Presence in the base is important to exclude added files, presence in the

716

# Presence in the base is important to exclude added files, presence in the

711

# source is important to exclude removed files.

717

# source is important to exclude removed files.

712

filt = lambda f: f not in m1 and f in base and f in c2

718

filt = lambda f: f not in m1 and f in base and f in c2

713

missingfiles = [f for f in changedfiles if filt(f)]

719

missingfiles = [f for f in changedfiles if filt(f)]

714

720

715

if missingfiles:

721

if missingfiles:

716

basenametofilename = collections.defaultdict(list)

722

basenametofilename = collections.defaultdict(list)

717

dirnametofilename = collections.defaultdict(list)

723

dirnametofilename = collections.defaultdict(list)

718

724

719

for f in m1.filesnotin(base.manifest()):

725

for f in m1.filesnotin(base.manifest()):

720

basename = os.path.basename(f)

726

basename = os.path.basename(f)

721

dirname = os.path.dirname(f)

727

dirname = os.path.dirname(f)

722

basenametofilename[basename].append(f)

728

basenametofilename[basename].append(f)

723

dirnametofilename[dirname].append(f)

729

dirnametofilename[dirname].append(f)

724

730

725

for f in missingfiles:

731

for f in missingfiles:

726

basename = os.path.basename(f)

732

basename = os.path.basename(f)

727

dirname = os.path.dirname(f)

733

dirname = os.path.dirname(f)

728

samebasename = basenametofilename[basename]

734

samebasename = basenametofilename[basename]

729

samedirname = dirnametofilename[dirname]

735

samedirname = dirnametofilename[dirname]

730

movecandidates = samebasename + samedirname

736

movecandidates = samebasename + samedirname

731

# f is guaranteed to be present in c2, that's why

737

# f is guaranteed to be present in c2, that's why

732

# c2.filectx(f) won't fail

738

# c2.filectx(f) won't fail

733

f2 = c2.filectx(f)

739

f2 = c2.filectx(f)

734

# we can have a lot of candidates which can slow down the heuristics

740

# we can have a lot of candidates which can slow down the heuristics

735

# config value to limit the number of candidates moves to check

741

# config value to limit the number of candidates moves to check

736

maxcandidates = repo.ui.configint(

742

maxcandidates = repo.ui.configint(

737

b'experimental', b'copytrace.movecandidateslimit'

743

b'experimental', b'copytrace.movecandidateslimit'

738

)

744

)

739

745

740

if len(movecandidates) > maxcandidates:

746

if len(movecandidates) > maxcandidates:

741

repo.ui.status(

747

repo.ui.status(

742

_(

748

_(

743

b"skipping copytracing for '%s', more "

749

b"skipping copytracing for '%s', more "

744

b"candidates than the limit: %d\n"

750

b"candidates than the limit: %d\n"

745

)

751

)

746

% (f, len(movecandidates))

752

% (f, len(movecandidates))

747

)

753

)

748

continue

754

continue

749

755

750

for candidate in movecandidates:

756

for candidate in movecandidates:

751

f1 = c1.filectx(candidate)

757

f1 = c1.filectx(candidate)

752

if _related(f1, f2):

758

if _related(f1, f2):

753

# if there are a few related copies then we'll merge

759

# if there are a few related copies then we'll merge

754

# changes into all of them. This matches the behaviour

760

# changes into all of them. This matches the behaviour

755

# of upstream copytracing

761

# of upstream copytracing

756

copies[candidate] = f

762

copies[candidate] = f

757

763

758

return copies, {}, {}, {}, {}

764

return copies, {}, {}, {}, {}

759

765

760

766

761

def _related(f1, f2):

767

def _related(f1, f2):

762

"""return True if f1 and f2 filectx have a common ancestor

768

"""return True if f1 and f2 filectx have a common ancestor

763

769

764

Walk back to common ancestor to see if the two files originate

770

Walk back to common ancestor to see if the two files originate

765

from the same file. Since workingfilectx's rev() is None it messes

771

from the same file. Since workingfilectx's rev() is None it messes

766

up the integer comparison logic, hence the pre-step check for

772

up the integer comparison logic, hence the pre-step check for

767

None (f1 and f2 can only be workingfilectx's initially).

773

None (f1 and f2 can only be workingfilectx's initially).

768

"""

774

"""

769

775

770

if f1 == f2:

776

if f1 == f2:

771

return True # a match

777

return True # a match

772

778

773

g1, g2 = f1.ancestors(), f2.ancestors()

779

g1, g2 = f1.ancestors(), f2.ancestors()

774

try:

780

try:

775

f1r, f2r = f1.linkrev(), f2.linkrev()

781

f1r, f2r = f1.linkrev(), f2.linkrev()

776

782

777

if f1r is None:

783

if f1r is None:

778

f1 = next(g1)

784

f1 = next(g1)

779

if f2r is None:

785

if f2r is None:

780

f2 = next(g2)

786

f2 = next(g2)

781

787

782

while True:

788

while True:

783

f1r, f2r = f1.linkrev(), f2.linkrev()

789

f1r, f2r = f1.linkrev(), f2.linkrev()

784

if f1r > f2r:

790

if f1r > f2r:

785

f1 = next(g1)

791

f1 = next(g1)

786

elif f2r > f1r:

792

elif f2r > f1r:

787

f2 = next(g2)

793

f2 = next(g2)

788

else: # f1 and f2 point to files in the same linkrev

794

else: # f1 and f2 point to files in the same linkrev

789

return f1 == f2 # true if they point to the same file

795

return f1 == f2 # true if they point to the same file

790

except StopIteration:

796

except StopIteration:

791

return False

797

return False

792

798

793

799

794

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

800

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

795

"""reproduce copies from fromrev to rev in the dirstate

801

"""reproduce copies from fromrev to rev in the dirstate

796

802

797

If skiprev is specified, it's a revision that should be used to

803

If skiprev is specified, it's a revision that should be used to

798

filter copy records. Any copies that occur between fromrev and

804

filter copy records. Any copies that occur between fromrev and

799

skiprev will not be duplicated, even if they appear in the set of

805

skiprev will not be duplicated, even if they appear in the set of

800

copies between fromrev and rev.

806

copies between fromrev and rev.

801

"""

807

"""

802

exclude = {}

808

exclude = {}

803

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

809

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

804

bctrace = stringutil.parsebool(ctraceconfig)

810

bctrace = stringutil.parsebool(ctraceconfig)

805

if skiprev is not None and (

811

if skiprev is not None and (

806

ctraceconfig == b'heuristics' or bctrace or bctrace is None

812

ctraceconfig == b'heuristics' or bctrace or bctrace is None

807

):

813

):

808

# copytrace='off' skips this line, but not the entire function because

814

# copytrace='off' skips this line, but not the entire function because

809

# the line below is O(size of the repo) during a rebase, while the rest

815

# the line below is O(size of the repo) during a rebase, while the rest

810

# of the function is much faster (and is required for carrying copy

816

# of the function is much faster (and is required for carrying copy

811

# metadata across the rebase anyway).

817

# metadata across the rebase anyway).

812

exclude = pathcopies(repo[fromrev], repo[skiprev])

818

exclude = pathcopies(repo[fromrev], repo[skiprev])

813

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

819

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

814

if dst in exclude:

820

if dst in exclude:

815

continue

821

continue

816

if dst in wctx:

822

if dst in wctx:

817

wctx[dst].markcopied(src)

823

wctx[dst].markcopied(src)

818

824

819

825

820

def computechangesetfilesadded(ctx):

826

def computechangesetfilesadded(ctx):

821

"""return the list of files added in a changeset

827

"""return the list of files added in a changeset

822

"""

828

"""

823

added = []

829

added = []

824

for f in ctx.files():

830

for f in ctx.files():

825

if not any(f in p for p in ctx.parents()):

831

if not any(f in p for p in ctx.parents()):

826

added.append(f)

832

added.append(f)

827

return added

833

return added

828

834

829

835

830

def computechangesetfilesremoved(ctx):

836

def computechangesetfilesremoved(ctx):

831

"""return the list of files removed in a changeset

837

"""return the list of files removed in a changeset

832

"""

838

"""

833

removed = []

839

removed = []

834

for f in ctx.files():

840

for f in ctx.files():

835

if f not in ctx:

841

if f not in ctx:

836

removed.append(f)

842

removed.append(f)

837

return removed

843

return removed

838

844

839

845

840

def computechangesetcopies(ctx):

846

def computechangesetcopies(ctx):

841

"""return the copies data for a changeset

847

"""return the copies data for a changeset

842

848

843

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

849

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

844

850

845

Each dictionnary are in the form: `{newname: oldname}`

851

Each dictionnary are in the form: `{newname: oldname}`

846

"""

852

"""

847

p1copies = {}

853

p1copies = {}

848

p2copies = {}

854

p2copies = {}

849

p1 = ctx.p1()

855

p1 = ctx.p1()

850

p2 = ctx.p2()

856

p2 = ctx.p2()

851

narrowmatch = ctx._repo.narrowmatch()

857

narrowmatch = ctx._repo.narrowmatch()

852

for dst in ctx.files():

858

for dst in ctx.files():

853

if not narrowmatch(dst) or dst not in ctx:

859

if not narrowmatch(dst) or dst not in ctx:

854

continue

860

continue

855

copied = ctx[dst].renamed()

861

copied = ctx[dst].renamed()

856

if not copied:

862

if not copied:

857

continue

863

continue

858

src, srcnode = copied

864

src, srcnode = copied

859

if src in p1 and p1[src].filenode() == srcnode:

865

if src in p1 and p1[src].filenode() == srcnode:

860

p1copies[dst] = src

866

p1copies[dst] = src

861

elif src in p2 and p2[src].filenode() == srcnode:

867

elif src in p2 and p2[src].filenode() == srcnode:

862

p2copies[dst] = src

868

p2copies[dst] = src

863

return p1copies, p2copies

869

return p1copies, p2copies

864

870

865

871

866

def encodecopies(files, copies):

872

def encodecopies(files, copies):

867

items = []

873

items = []

868

for i, dst in enumerate(files):

874

for i, dst in enumerate(files):

869

if dst in copies:

875

if dst in copies:

870

items.append(b'%d\0%s' % (i, copies[dst]))

876

items.append(b'%d\0%s' % (i, copies[dst]))

871

if len(items) != len(copies):

877

if len(items) != len(copies):

872

raise error.ProgrammingError(

878

raise error.ProgrammingError(

873

b'some copy targets missing from file list'

879

b'some copy targets missing from file list'

874

)

880

)

875

return b"\n".join(items)

881

return b"\n".join(items)

876

882

877

883

878

def decodecopies(files, data):

884

def decodecopies(files, data):

879

try:

885

try:

880

copies = {}

886

copies = {}

881

if not data:

887

if not data:

882

return copies

888

return copies

883

for l in data.split(b'\n'):

889

for l in data.split(b'\n'):

884

strindex, src = l.split(b'\0')

890

strindex, src = l.split(b'\0')

885

i = int(strindex)

891

i = int(strindex)

886

dst = files[i]

892

dst = files[i]

887

copies[dst] = src

893

copies[dst] = src

888

return copies

894

return copies

889

except (ValueError, IndexError):

895

except (ValueError, IndexError):

890

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

896

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

891

# used different syntax for the value.

897

# used different syntax for the value.

892

return None

898

return None

893

899

894

900

895

def encodefileindices(files, subset):

901

def encodefileindices(files, subset):

896

subset = set(subset)

902

subset = set(subset)

897

indices = []

903

indices = []

898

for i, f in enumerate(files):

904

for i, f in enumerate(files):

899

if f in subset:

905

if f in subset:

900

indices.append(b'%d' % i)

906

indices.append(b'%d' % i)

901

return b'\n'.join(indices)

907

return b'\n'.join(indices)

902

908

903

909

904

def decodefileindices(files, data):

910

def decodefileindices(files, data):

905

try:

911

try:

906

subset = []

912

subset = []

907

if not data:

913

if not data:

908

return subset

914

return subset

909

for strindex in data.split(b'\n'):

915

for strindex in data.split(b'\n'):

910

i = int(strindex)

916

i = int(strindex)

911

if i < 0 or i >= len(files):

917

if i < 0 or i >= len(files):

912

return None

918

return None

913

subset.append(files[i])

919

subset.append(files[i])

914

return subset

920

return subset

915

except (ValueError, IndexError):

921

except (ValueError, IndexError):

916

# Perhaps someone had chosen the same key name (e.g. "added") and

922

# Perhaps someone had chosen the same key name (e.g. "added") and

917

# used different syntax for the value.

923

# used different syntax for the value.

918

return None

924

return None

919

925

920

926

921

def _getsidedata(srcrepo, rev):

927

def _getsidedata(srcrepo, rev):

922

ctx = srcrepo[rev]

928

ctx = srcrepo[rev]

923

filescopies = computechangesetcopies(ctx)

929

filescopies = computechangesetcopies(ctx)

924

filesadded = computechangesetfilesadded(ctx)

930

filesadded = computechangesetfilesadded(ctx)

925

filesremoved = computechangesetfilesremoved(ctx)

931

filesremoved = computechangesetfilesremoved(ctx)

926

sidedata = {}

932

sidedata = {}

927

if any([filescopies, filesadded, filesremoved]):

933

if any([filescopies, filesadded, filesremoved]):

928

sortedfiles = sorted(ctx.files())

934

sortedfiles = sorted(ctx.files())

929

p1copies, p2copies = filescopies

935

p1copies, p2copies = filescopies

930

p1copies = encodecopies(sortedfiles, p1copies)

936

p1copies = encodecopies(sortedfiles, p1copies)

931

p2copies = encodecopies(sortedfiles, p2copies)

937

p2copies = encodecopies(sortedfiles, p2copies)

932

filesadded = encodefileindices(sortedfiles, filesadded)

938

filesadded = encodefileindices(sortedfiles, filesadded)

933

filesremoved = encodefileindices(sortedfiles, filesremoved)

939

filesremoved = encodefileindices(sortedfiles, filesremoved)

934

if p1copies:

940

if p1copies:

935

sidedata[sidedatamod.SD_P1COPIES] = p1copies

941

sidedata[sidedatamod.SD_P1COPIES] = p1copies

936

if p2copies:

942

if p2copies:

937

sidedata[sidedatamod.SD_P2COPIES] = p2copies

943

sidedata[sidedatamod.SD_P2COPIES] = p2copies

938

if filesadded:

944

if filesadded:

939

sidedata[sidedatamod.SD_FILESADDED] = filesadded

945

sidedata[sidedatamod.SD_FILESADDED] = filesadded

940

if filesremoved:

946

if filesremoved:

941

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

947

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

942

return sidedata

948

return sidedata

943

949

944

950

945

def getsidedataadder(srcrepo, destrepo):

951

def getsidedataadder(srcrepo, destrepo):

946

def sidedatacompanion(revlog, rev):

952

def sidedatacompanion(revlog, rev):

947

sidedata = {}

953

sidedata = {}

948

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

954

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

949

sidedata = _getsidedata(srcrepo, rev)

955

sidedata = _getsidedata(srcrepo, rev)

950

return False, (), sidedata

956

return False, (), sidedata

951

957

952

return sidedatacompanion

958

return sidedatacompanion

953

959

954

960

955

def getsidedataremover(srcrepo, destrepo):

961

def getsidedataremover(srcrepo, destrepo):

956

def sidedatacompanion(revlog, rev):

962

def sidedatacompanion(revlog, rev):

957

f = ()

963

f = ()

958

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

964

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

959

if revlog.flags(rev) & REVIDX_SIDEDATA:

965

if revlog.flags(rev) & REVIDX_SIDEDATA:

960

f = (

966

f = (

961

sidedatamod.SD_P1COPIES,

967

sidedatamod.SD_P1COPIES,

962

sidedatamod.SD_P2COPIES,

968

sidedatamod.SD_P2COPIES,

963

sidedatamod.SD_FILESADDED,

969

sidedatamod.SD_FILESADDED,

964

sidedatamod.SD_FILESREMOVED,

970

sidedatamod.SD_FILESREMOVED,

965

)

971

)

966

return False, f, {}

972

return False, f, {}

967

973

968

return sidedatacompanion

974

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
+                    flags = cl.flags
                     def revinfo(rev):
                         p1, p2 = parents(rev)
-                        c = changelogrevision(rev)
+                        if flags(rev) & REVIDX_SIDEDATA:
-                        p1copies = c.p1copies
+                            c = changelogrevision(rev)
-                        p2copies = c.p2copies
+                            p1copies = c.p1copies
-                        removed = c.filesremoved
+                            p2copies = c.p2copies
+                            removed = c.filesremoved
+                        else:
+                            p1copies = {}
+                            p2copies = {}
+                            removed = ()
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                 roots = set(children) - set(missingrevs)
                 work = list(roots)
                 all_copies = {r: {} for r in roots}
                 heapq.heapify(work)
                 alwaysmatch = match.always()
                 while work:
                     r = heapq.heappop(work)
                     copies = all_copies.pop(r)
                     if r == b.rev():
                         return copies
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         # Copy the dict only if later iterations will also need it
                         if i != len(children[r]) - 1:
                             newcopies = copies.copy()
                         else:
                             newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                         for f in removed:
                             if f in newcopies:
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             heapq.heappush(work, c)
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 assert False
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion