upstream/mercurial-mirror Commit - r43594:ffd04bc9

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

18

from . import (

18

from . import (

19

error,

19

error,

20

match as matchmod,

20

match as matchmod,

21

node,

21

node,

22

pathutil,

22

pathutil,

23

pycompat,

23

pycompat,

24

util,

24

util,

25

)

25

)

26

27

from .revlogutils import sidedata as sidedatamod

27

from .revlogutils import sidedata as sidedatamod

28

29

from .utils import stringutil

29

from .utils import stringutil

30

31

32

def _filter(src, dst, t):

32

def _filter(src, dst, t):

33

"""filters out invalid copies after chaining"""

33

"""filters out invalid copies after chaining"""

34

35

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

35

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

36

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# in the following table (not including trivial cases). For example, case 2

37

# in the following table (not including trivial cases). For example, case 2

38

# is where a file existed in 'src' and remained under that name in 'mid' and

38

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# then was renamed between 'mid' and 'dst'.

39

# then was renamed between 'mid' and 'dst'.

40

#

40

#

41

# case src mid dst result

41

# case src mid dst result

42

# 1 x y - -

42

# 1 x y - -

43

# 2 x y y x->y

43

# 2 x y y x->y

44

# 3 x y x -

44

# 3 x y x -

45

# 4 x y z x->z

45

# 4 x y z x->z

46

# 5 - x y -

46

# 5 - x y -

47

# 6 x x y x->y

47

# 6 x x y x->y

48

#

48

#

49

# _chain() takes care of chaining the copies in 'a' and 'b', but it

49

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

50

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# between 5 and 6, so it includes all cases in its result.

51

# between 5 and 6, so it includes all cases in its result.

52

# Cases 1, 3, and 5 are then removed by _filter().

52

# Cases 1, 3, and 5 are then removed by _filter().

53

54

for k, v in list(t.items()):

54

for k, v in list(t.items()):

55

# remove copies from files that didn't exist

55

# remove copies from files that didn't exist

56

if v not in src:

56

if v not in src:

57

del t[k]

57

del t[k]

58

# remove criss-crossed copies

58

# remove criss-crossed copies

59

elif k in src and v in dst:

59

elif k in src and v in dst:

60

del t[k]

60

del t[k]

61

# remove copies to files that were then removed

61

# remove copies to files that were then removed

62

elif k not in dst:

62

elif k not in dst:

63

del t[k]

63

del t[k]

64

65

66

def _chain(a, b):

66

def _chain(a, b):

67

"""chain two sets of copies 'a' and 'b'"""

67

"""chain two sets of copies 'a' and 'b'"""

68

t = a.copy()

68

t = a.copy()

69

for k, v in pycompat.iteritems(b):

69

for k, v in pycompat.iteritems(b):

70

if v in t:

70

if v in t:

71

t[k] = t[v]

71

t[k] = t[v]

72

else:

72

else:

73

t[k] = v

73

t[k] = v

74

return t

74

return t

75

76

77

def _tracefile(fctx, am, basemf):

77

def _tracefile(fctx, am, basemf):

78

"""return file context that is the ancestor of fctx present in ancestor

78

"""return file context that is the ancestor of fctx present in ancestor

79

manifest am

79

manifest am

80

81

Note: we used to try and stop after a given limit, however checking if that

81

Note: we used to try and stop after a given limit, however checking if that

82

limit is reached turned out to be very expensive. we are better off

82

limit is reached turned out to be very expensive. we are better off

83

disabling that feature."""

83

disabling that feature."""

84

85

for f in fctx.ancestors():

85

for f in fctx.ancestors():

86

path = f.path()

86

path = f.path()

87

if am.get(path, None) == f.filenode():

87

if am.get(path, None) == f.filenode():

88

return path

88

return path

89

if basemf and basemf.get(path, None) == f.filenode():

89

if basemf and basemf.get(path, None) == f.filenode():

90

return path

90

return path

91

92

93

def _dirstatecopies(repo, match=None):

93

def _dirstatecopies(repo, match=None):

94

ds = repo.dirstate

94

ds = repo.dirstate

95

c = ds.copies().copy()

95

c = ds.copies().copy()

96

for k in list(c):

96

for k in list(c):

97

if ds[k] not in b'anm' or (match and not match(k)):

97

if ds[k] not in b'anm' or (match and not match(k)):

98

del c[k]

98

del c[k]

99

return c

99

return c

100

101

102

def _computeforwardmissing(a, b, match=None):

102

def _computeforwardmissing(a, b, match=None):

103

"""Computes which files are in b but not a.

103

"""Computes which files are in b but not a.

104

This is its own function so extensions can easily wrap this call to see what

104

This is its own function so extensions can easily wrap this call to see what

105

files _forwardcopies is about to process.

105

files _forwardcopies is about to process.

106

"""

106

"""

107

ma = a.manifest()

107

ma = a.manifest()

108

mb = b.manifest()

108

mb = b.manifest()

109

return mb.filesnotin(ma, match=match)

109

return mb.filesnotin(ma, match=match)

110

111

112

def usechangesetcentricalgo(repo):

112

def usechangesetcentricalgo(repo):

113

"""Checks if we should use changeset-centric copy algorithms"""

113

"""Checks if we should use changeset-centric copy algorithms"""

114

if repo.filecopiesmode == b'changeset-sidedata':

114

if repo.filecopiesmode == b'changeset-sidedata':

115

return True

115

return True

116

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

116

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

changesetsource = (b'changeset-only', b'compatibility')

117

changesetsource = (b'changeset-only', b'compatibility')

118

return readfrom in changesetsource

118

return readfrom in changesetsource

119

120

121

def _committedforwardcopies(a, b, base, match):

121

def _committedforwardcopies(a, b, base, match):

122

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

122

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

# files might have to be traced back to the fctx parent of the last

123

# files might have to be traced back to the fctx parent of the last

124

# one-side-only changeset, but not further back than that

124

# one-side-only changeset, but not further back than that

125

repo = a._repo

125

repo = a._repo

126

127

if usechangesetcentricalgo(repo):

127

if usechangesetcentricalgo(repo):

128

return _changesetforwardcopies(a, b, match)

128

return _changesetforwardcopies(a, b, match)

129

130

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

130

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

dbg = repo.ui.debug

131

dbg = repo.ui.debug

132

if debug:

132

if debug:

133

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

133

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

am = a.manifest()

134

am = a.manifest()

135

basemf = None if base is None else base.manifest()

135

basemf = None if base is None else base.manifest()

136

137

# find where new files came from

137

# find where new files came from

138

# we currently don't try to find where old files went, too expensive

138

# we currently don't try to find where old files went, too expensive

139

# this means we can miss a case like 'hg rm b; hg cp a b'

139

# this means we can miss a case like 'hg rm b; hg cp a b'

140

cm = {}

140

cm = {}

141

142

# Computing the forward missing is quite expensive on large manifests, since

142

# Computing the forward missing is quite expensive on large manifests, since

143

# it compares the entire manifests. We can optimize it in the common use

143

# it compares the entire manifests. We can optimize it in the common use

144

# case of computing what copies are in a commit versus its parent (like

144

# case of computing what copies are in a commit versus its parent (like

145

# during a rebase or histedit). Note, we exclude merge commits from this

145

# during a rebase or histedit). Note, we exclude merge commits from this

146

# optimization, since the ctx.files() for a merge commit is not correct for

146

# optimization, since the ctx.files() for a merge commit is not correct for

147

# this comparison.

147

# this comparison.

148

forwardmissingmatch = match

148

forwardmissingmatch = match

149

if b.p1() == a and b.p2().node() == node.nullid:

149

if b.p1() == a and b.p2().node() == node.nullid:

150

filesmatcher = matchmod.exact(b.files())

150

filesmatcher = matchmod.exact(b.files())

151

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

151

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

152

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

153

154

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

154

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

155

156

if debug:

156

if debug:

157

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

157

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

158

159

for f in sorted(missing):

159

for f in sorted(missing):

160

if debug:

160

if debug:

161

dbg(b'debug.copies: tracing file: %s\n' % f)

161

dbg(b'debug.copies: tracing file: %s\n' % f)

162

fctx = b[f]

162

fctx = b[f]

163

fctx._ancestrycontext = ancestrycontext

163

fctx._ancestrycontext = ancestrycontext

164

165

if debug:

165

if debug:

166

start = util.timer()

166

start = util.timer()

167

opath = _tracefile(fctx, am, basemf)

167

opath = _tracefile(fctx, am, basemf)

168

if opath:

168

if opath:

169

if debug:

169

if debug:

170

dbg(b'debug.copies: rename of: %s\n' % opath)

170

dbg(b'debug.copies: rename of: %s\n' % opath)

171

cm[f] = opath

171

cm[f] = opath

172

if debug:

172

if debug:

173

dbg(

173

dbg(

174

b'debug.copies: time: %f seconds\n'

174

b'debug.copies: time: %f seconds\n'

175

% (util.timer() - start)

175

% (util.timer() - start)

176

)

176

)

177

return cm

177

return cm

178

179

180

def _revinfogetter(repo):

180

def _revinfogetter(repo):

181

"""return a function that return multiple data given a <rev>"i

181

"""return a function that return multiple data given a <rev>"i

182

183

* p1: revision number of first parent

183

* p1: revision number of first parent

184

* p2: revision number of first parent

184

* p2: revision number of first parent

185

* p1copies: mapping of copies from p1

185

* p1copies: mapping of copies from p1

186

* p2copies: mapping of copies from p2

186

* p2copies: mapping of copies from p2

187

* removed: a list of removed files

187

* removed: a list of removed files

188

"""

188

"""

189

cl = repo.changelog

189

cl = repo.changelog

190

parents = cl.parentrevs

190

parents = cl.parentrevs

191

192

if repo.filecopiesmode == b'changeset-sidedata':

192

if repo.filecopiesmode == b'changeset-sidedata':

193

changelogrevision = cl.changelogrevision

193

changelogrevision = cl.changelogrevision

194

flags = cl.flags

194

flags = cl.flags

195

196

def revinfo(rev):

196

def revinfo(rev):

197

p1, p2 = parents(rev)

197

p1, p2 = parents(rev)

198

if flags(rev) & REVIDX_SIDEDATA:

198

if flags(rev) & REVIDX_SIDEDATA:

199

c = changelogrevision(rev)

199

c = changelogrevision(rev)

200

p1copies = c.p1copies

200

p1copies = c.p1copies

201

p2copies = c.p2copies

201

p2copies = c.p2copies

202

removed = c.filesremoved

202

removed = c.filesremoved

203

else:

203

else:

204

p1copies = {}

204

p1copies = {}

205

p2copies = {}

205

p2copies = {}

206

removed = ()

206

removed = ()

207

return p1, p2, p1copies, p2copies, removed

207

return p1, p2, p1copies, p2copies, removed

208

209

else:

209

else:

210

211

def revinfo(rev):

211

def revinfo(rev):

212

p1, p2 = parents(rev)

212

p1, p2 = parents(rev)

213

ctx = repo[rev]

213

ctx = repo[rev]

214

p1copies, p2copies = ctx._copies

214

p1copies, p2copies = ctx._copies

215

removed = ctx.filesremoved()

215

removed = ctx.filesremoved()

216

return p1, p2, p1copies, p2copies, removed

216

return p1, p2, p1copies, p2copies, removed

217

218

return revinfo

218

return revinfo

219

220

221

def _changesetforwardcopies(a, b, match):

221

def _changesetforwardcopies(a, b, match):

222

if a.rev() in (node.nullrev, b.rev()):

222

if a.rev() in (node.nullrev, b.rev()):

223

return {}

223

return {}

224

225

repo = a.repo().unfiltered()

225

repo = a.repo().unfiltered()

226

children = {}

226

children = {}

227

revinfo = _revinfogetter(repo)

227

revinfo = _revinfogetter(repo)

228

229

cl = repo.changelog

229

cl = repo.changelog

230

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

230

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

231

mrset = set(missingrevs)

231

mrset = set(missingrevs)

232

roots = set()

232

roots = set()

233

for r in missingrevs:

233

for r in missingrevs:

234

for p in cl.parentrevs(r):

234

for p in cl.parentrevs(r):

235

if p == node.nullrev:

235

if p == node.nullrev:

236

continue

236

continue

237

if p not in children:

237

if p not in children:

238

children[p] = [r]

238

children[p] = [r]

239

else:

239

else:

240

children[p].append(r)

240

children[p].append(r)

241

if p not in mrset:

241

if p not in mrset:

242

roots.add(p)

242

roots.add(p)

243

if not roots:

243

if not roots:

244

# no common revision to track copies from

244

# no common revision to track copies from

245

return {}

245

return {}

246

min_root = min(roots)

246

min_root = min(roots)

247

248

from_head = set(

248

from_head = set(

249

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

249

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

250

)

250

)

251

252

iterrevs = set(from_head)

252

iterrevs = set(from_head)

253

iterrevs &= mrset

253

iterrevs &= mrset

254

iterrevs.update(roots)

254

iterrevs.update(roots)

255

iterrevs.remove(b.rev())

255

iterrevs.remove(b.rev())

256

all_copies = {r: {} for r in roots}

256

all_copies = {r: {} for r in roots}

257

alwaysmatch = match.always()

257

alwaysmatch = match.always()

258

for r in sorted(iterrevs):

258

for r in sorted(iterrevs):

259

copies = all_copies.pop(r)

259

copies = all_copies.pop(r)

260

for i, c in enumerate(children[r]):

260

for i, c in enumerate(children[r]):

261

p1, p2, p1copies, p2copies, removed = revinfo(c)

261

p1, p2, p1copies, p2copies, removed = revinfo(c)

262

if r == p1:

262

if r == p1:

263

parent = 1

263

parent = 1

264

childcopies = p1copies

264

childcopies = p1copies

265

else:

265

else:

266

assert r == p2

266

assert r == p2

267

parent = 2

267

parent = 2

268

childcopies = p2copies

268

childcopies = p2copies

269

if not alwaysmatch:

269

if not alwaysmatch:

270

childcopies = {

270

childcopies = {

271

dst: src for dst, src in childcopies.items() if match(dst)

271

dst: src for dst, src in childcopies.items() if match(dst)

272

}

272

}

273

# Copy the dict only if later iterations will also need it

274

if i != len(children[r]) - 1:

275

newcopies = copies.copy()

276

else:

277

newcopies = copies

273

newcopies = copies

278

if childcopies:

274

if childcopies:

279

newcopies = _chain(newcopies, childcopies)

275

newcopies = _chain(newcopies, childcopies)

276

# _chain makes a copies, we can avoid doing so in some

277

# simple/linear cases.

278

assert newcopies is not copies

280

for f in removed:

279

for f in removed:

281

if f in newcopies:

280

if f in newcopies:

281

if newcopies is copies:

282

# copy on write to avoid affecting potential other

283

# branches. when there are no other branches, this

284

# could be avoided.

285

newcopies = copies.copy()

282

del newcopies[f]

286

del newcopies[f]

283

othercopies = all_copies.get(c)

287

othercopies = all_copies.get(c)

284

if othercopies is None:

288

if othercopies is None:

285

all_copies[c] = newcopies

289

all_copies[c] = newcopies

286

else:

290

else:

287

# we are the second parent to work on c, we need to merge our

291

# we are the second parent to work on c, we need to merge our

288

# work with the other.

292

# work with the other.

289

#

293

#

290

# Unlike when copies are stored in the filelog, we consider

294

# Unlike when copies are stored in the filelog, we consider

291

# it a copy even if the destination already existed on the

295

# it a copy even if the destination already existed on the

292

# other branch. It's simply too expensive to check if the

296

# other branch. It's simply too expensive to check if the

293

# file existed in the manifest.

297

# file existed in the manifest.

294

#

298

#

295

# In case of conflict, parent 1 take precedence over parent 2.

299

# In case of conflict, parent 1 take precedence over parent 2.

296

# This is an arbitrary choice made anew when implementing

300

# This is an arbitrary choice made anew when implementing

297

# changeset based copies. It was made without regards with

301

# changeset based copies. It was made without regards with

298

# potential filelog related behavior.

302

# potential filelog related behavior.

299

if parent == 1:

303

if parent == 1:

300

othercopies.update(newcopies)

304

othercopies.update(newcopies)

301

else:

305

else:

302

newcopies.update(othercopies)

306

newcopies.update(othercopies)

303

all_copies[c] = newcopies

307

all_copies[c] = newcopies

304

return all_copies[b.rev()]

308

return all_copies[b.rev()]

305

309

306

310

307

def _forwardcopies(a, b, base=None, match=None):

311

def _forwardcopies(a, b, base=None, match=None):

308

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

312

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

309

313

310

if base is None:

314

if base is None:

311

base = a

315

base = a

312

match = a.repo().narrowmatch(match)

316

match = a.repo().narrowmatch(match)

313

# check for working copy

317

# check for working copy

314

if b.rev() is None:

318

if b.rev() is None:

315

cm = _committedforwardcopies(a, b.p1(), base, match)

319

cm = _committedforwardcopies(a, b.p1(), base, match)

316

# combine copies from dirstate if necessary

320

# combine copies from dirstate if necessary

317

copies = _chain(cm, _dirstatecopies(b._repo, match))

321

copies = _chain(cm, _dirstatecopies(b._repo, match))

318

else:

322

else:

319

copies = _committedforwardcopies(a, b, base, match)

323

copies = _committedforwardcopies(a, b, base, match)

320

return copies

324

return copies

321

325

322

326

323

def _backwardrenames(a, b, match):

327

def _backwardrenames(a, b, match):

324

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

328

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

325

return {}

329

return {}

326

330

327

# Even though we're not taking copies into account, 1:n rename situations

331

# Even though we're not taking copies into account, 1:n rename situations

328

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

332

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

329

# arbitrarily pick one of the renames.

333

# arbitrarily pick one of the renames.

330

# We don't want to pass in "match" here, since that would filter

334

# We don't want to pass in "match" here, since that would filter

331

# the destination by it. Since we're reversing the copies, we want

335

# the destination by it. Since we're reversing the copies, we want

332

# to filter the source instead.

336

# to filter the source instead.

333

f = _forwardcopies(b, a)

337

f = _forwardcopies(b, a)

334

r = {}

338

r = {}

335

for k, v in sorted(pycompat.iteritems(f)):

339

for k, v in sorted(pycompat.iteritems(f)):

336

if match and not match(v):

340

if match and not match(v):

337

continue

341

continue

338

# remove copies

342

# remove copies

339

if v in a:

343

if v in a:

340

continue

344

continue

341

r[v] = k

345

r[v] = k

342

return r

346

return r

343

347

344

348

345

def pathcopies(x, y, match=None):

349

def pathcopies(x, y, match=None):

346

"""find {dst@y: src@x} copy mapping for directed compare"""

350

"""find {dst@y: src@x} copy mapping for directed compare"""

347

repo = x._repo

351

repo = x._repo

348

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

352

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

349

if debug:

353

if debug:

350

repo.ui.debug(

354

repo.ui.debug(

351

b'debug.copies: searching copies from %s to %s\n' % (x, y)

355

b'debug.copies: searching copies from %s to %s\n' % (x, y)

352

)

356

)

353

if x == y or not x or not y:

357

if x == y or not x or not y:

354

return {}

358

return {}

355

a = y.ancestor(x)

359

a = y.ancestor(x)

356

if a == x:

360

if a == x:

357

if debug:

361

if debug:

358

repo.ui.debug(b'debug.copies: search mode: forward\n')

362

repo.ui.debug(b'debug.copies: search mode: forward\n')

359

if y.rev() is None and x == y.p1():

363

if y.rev() is None and x == y.p1():

360

# short-circuit to avoid issues with merge states

364

# short-circuit to avoid issues with merge states

361

return _dirstatecopies(repo, match)

365

return _dirstatecopies(repo, match)

362

copies = _forwardcopies(x, y, match=match)

366

copies = _forwardcopies(x, y, match=match)

363

elif a == y:

367

elif a == y:

364

if debug:

368

if debug:

365

repo.ui.debug(b'debug.copies: search mode: backward\n')

369

repo.ui.debug(b'debug.copies: search mode: backward\n')

366

copies = _backwardrenames(x, y, match=match)

370

copies = _backwardrenames(x, y, match=match)

367

else:

371

else:

368

if debug:

372

if debug:

369

repo.ui.debug(b'debug.copies: search mode: combined\n')

373

repo.ui.debug(b'debug.copies: search mode: combined\n')

370

base = None

374

base = None

371

if a.rev() != node.nullrev:

375

if a.rev() != node.nullrev:

372

base = x

376

base = x

373

copies = _chain(

377

copies = _chain(

374

_backwardrenames(x, a, match=match),

378

_backwardrenames(x, a, match=match),

375

_forwardcopies(a, y, base, match=match),

379

_forwardcopies(a, y, base, match=match),

376

)

380

)

377

_filter(x, y, copies)

381

_filter(x, y, copies)

378

return copies

382

return copies

379

383

380

384

381

def mergecopies(repo, c1, c2, base):

385

def mergecopies(repo, c1, c2, base):

382

"""

386

"""

383

Finds moves and copies between context c1 and c2 that are relevant for

387

Finds moves and copies between context c1 and c2 that are relevant for

384

merging. 'base' will be used as the merge base.

388

merging. 'base' will be used as the merge base.

385

389

386

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

390

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

387

files that were moved/ copied in one merge parent and modified in another.

391

files that were moved/ copied in one merge parent and modified in another.

388

For example:

392

For example:

389

393

390

o ---> 4 another commit

394

o ---> 4 another commit

391

|

395

|

392

| o ---> 3 commit that modifies a.txt

396

| o ---> 3 commit that modifies a.txt

393

| /

397

| /

394

o / ---> 2 commit that moves a.txt to b.txt

398

o / ---> 2 commit that moves a.txt to b.txt

395

|/

399

|/

396

o ---> 1 merge base

400

o ---> 1 merge base

397

401

398

If we try to rebase revision 3 on revision 4, since there is no a.txt in

402

If we try to rebase revision 3 on revision 4, since there is no a.txt in

399

revision 4, and if user have copytrace disabled, we prints the following

403

revision 4, and if user have copytrace disabled, we prints the following

400

message:

404

message:

401

405

402

```other changed <file> which local deleted```

406

```other changed <file> which local deleted```

403

407

404

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

408

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

405

"dirmove".

409

"dirmove".

406

410

407

"copy" is a mapping from destination name -> source name,

411

"copy" is a mapping from destination name -> source name,

408

where source is in c1 and destination is in c2 or vice-versa.

412

where source is in c1 and destination is in c2 or vice-versa.

409

413

410

"movewithdir" is a mapping from source name -> destination name,

414

"movewithdir" is a mapping from source name -> destination name,

411

where the file at source present in one context but not the other

415

where the file at source present in one context but not the other

412

needs to be moved to destination by the merge process, because the

416

needs to be moved to destination by the merge process, because the

413

other context moved the directory it is in.

417

other context moved the directory it is in.

414

418

415

"diverge" is a mapping of source name -> list of destination names

419

"diverge" is a mapping of source name -> list of destination names

416

for divergent renames.

420

for divergent renames.

417

421

418

"renamedelete" is a mapping of source name -> list of destination

422

"renamedelete" is a mapping of source name -> list of destination

419

names for files deleted in c1 that were renamed in c2 or vice-versa.

423

names for files deleted in c1 that were renamed in c2 or vice-versa.

420

424

421

"dirmove" is a mapping of detected source dir -> destination dir renames.

425

"dirmove" is a mapping of detected source dir -> destination dir renames.

422

This is needed for handling changes to new files previously grafted into

426

This is needed for handling changes to new files previously grafted into

423

renamed directories.

427

renamed directories.

424

428

425

This function calls different copytracing algorithms based on config.

429

This function calls different copytracing algorithms based on config.

426

"""

430

"""

427

# avoid silly behavior for update from empty dir

431

# avoid silly behavior for update from empty dir

428

if not c1 or not c2 or c1 == c2:

432

if not c1 or not c2 or c1 == c2:

429

return {}, {}, {}, {}, {}

433

return {}, {}, {}, {}, {}

430

434

431

narrowmatch = c1.repo().narrowmatch()

435

narrowmatch = c1.repo().narrowmatch()

432

436

433

# avoid silly behavior for parent -> working dir

437

# avoid silly behavior for parent -> working dir

434

if c2.node() is None and c1.node() == repo.dirstate.p1():

438

if c2.node() is None and c1.node() == repo.dirstate.p1():

435

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

439

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

436

440

437

copytracing = repo.ui.config(b'experimental', b'copytrace')

441

copytracing = repo.ui.config(b'experimental', b'copytrace')

438

if stringutil.parsebool(copytracing) is False:

442

if stringutil.parsebool(copytracing) is False:

439

# stringutil.parsebool() returns None when it is unable to parse the

443

# stringutil.parsebool() returns None when it is unable to parse the

440

# value, so we should rely on making sure copytracing is on such cases

444

# value, so we should rely on making sure copytracing is on such cases

441

return {}, {}, {}, {}, {}

445

return {}, {}, {}, {}, {}

442

446

443

if usechangesetcentricalgo(repo):

447

if usechangesetcentricalgo(repo):

444

# The heuristics don't make sense when we need changeset-centric algos

448

# The heuristics don't make sense when we need changeset-centric algos

445

return _fullcopytracing(repo, c1, c2, base)

449

return _fullcopytracing(repo, c1, c2, base)

446

450

447

# Copy trace disabling is explicitly below the node == p1 logic above

451

# Copy trace disabling is explicitly below the node == p1 logic above

448

# because the logic above is required for a simple copy to be kept across a

452

# because the logic above is required for a simple copy to be kept across a

449

# rebase.

453

# rebase.

450

if copytracing == b'heuristics':

454

if copytracing == b'heuristics':

451

# Do full copytracing if only non-public revisions are involved as

455

# Do full copytracing if only non-public revisions are involved as

452

# that will be fast enough and will also cover the copies which could

456

# that will be fast enough and will also cover the copies which could

453

# be missed by heuristics

457

# be missed by heuristics

454

if _isfullcopytraceable(repo, c1, base):

458

if _isfullcopytraceable(repo, c1, base):

455

return _fullcopytracing(repo, c1, c2, base)

459

return _fullcopytracing(repo, c1, c2, base)

456

return _heuristicscopytracing(repo, c1, c2, base)

460

return _heuristicscopytracing(repo, c1, c2, base)

457

else:

461

else:

458

return _fullcopytracing(repo, c1, c2, base)

462

return _fullcopytracing(repo, c1, c2, base)

459

463

460

464

461

def _isfullcopytraceable(repo, c1, base):

465

def _isfullcopytraceable(repo, c1, base):

462

""" Checks that if base, source and destination are all no-public branches,

466

""" Checks that if base, source and destination are all no-public branches,

463

if yes let's use the full copytrace algorithm for increased capabilities

467

if yes let's use the full copytrace algorithm for increased capabilities

464

since it will be fast enough.

468

since it will be fast enough.

465

469

466

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

470

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

467

number of changesets from c1 to base such that if number of changesets are

471

number of changesets from c1 to base such that if number of changesets are

468

more than the limit, full copytracing algorithm won't be used.

472

more than the limit, full copytracing algorithm won't be used.

469

"""

473

"""

470

if c1.rev() is None:

474

if c1.rev() is None:

471

c1 = c1.p1()

475

c1 = c1.p1()

472

if c1.mutable() and base.mutable():

476

if c1.mutable() and base.mutable():

473

sourcecommitlimit = repo.ui.configint(

477

sourcecommitlimit = repo.ui.configint(

474

b'experimental', b'copytrace.sourcecommitlimit'

478

b'experimental', b'copytrace.sourcecommitlimit'

475

)

479

)

476

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

480

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

477

return commits < sourcecommitlimit

481

return commits < sourcecommitlimit

478

return False

482

return False

479

483

480

484

481

def _checksinglesidecopies(

485

def _checksinglesidecopies(

482

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

486

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

483

):

487

):

484

if src not in m2:

488

if src not in m2:

485

# deleted on side 2

489

# deleted on side 2

486

if src not in m1:

490

if src not in m1:

487

# renamed on side 1, deleted on side 2

491

# renamed on side 1, deleted on side 2

488

renamedelete[src] = dsts1

492

renamedelete[src] = dsts1

489

elif m2[src] != mb[src]:

493

elif m2[src] != mb[src]:

490

if not _related(c2[src], base[src]):

494

if not _related(c2[src], base[src]):

491

return

495

return

492

# modified on side 2

496

# modified on side 2

493

for dst in dsts1:

497

for dst in dsts1:

494

if dst not in m2:

498

if dst not in m2:

495

# dst not added on side 2 (handle as regular

499

# dst not added on side 2 (handle as regular

496

# "both created" case in manifestmerge otherwise)

500

# "both created" case in manifestmerge otherwise)

497

copy[dst] = src

501

copy[dst] = src

498

502

499

503

500

def _fullcopytracing(repo, c1, c2, base):

504

def _fullcopytracing(repo, c1, c2, base):

501

""" The full copytracing algorithm which finds all the new files that were

505

""" The full copytracing algorithm which finds all the new files that were

502

added from merge base up to the top commit and for each file it checks if

506

added from merge base up to the top commit and for each file it checks if

503

this file was copied from another file.

507

this file was copied from another file.

504

508

505

This is pretty slow when a lot of changesets are involved but will track all

509

This is pretty slow when a lot of changesets are involved but will track all

506

the copies.

510

the copies.

507

"""

511

"""

508

m1 = c1.manifest()

512

m1 = c1.manifest()

509

m2 = c2.manifest()

513

m2 = c2.manifest()

510

mb = base.manifest()

514

mb = base.manifest()

511

515

512

copies1 = pathcopies(base, c1)

516

copies1 = pathcopies(base, c1)

513

copies2 = pathcopies(base, c2)

517

copies2 = pathcopies(base, c2)

514

518

515

inversecopies1 = {}

519

inversecopies1 = {}

516

inversecopies2 = {}

520

inversecopies2 = {}

517

for dst, src in copies1.items():

521

for dst, src in copies1.items():

518

inversecopies1.setdefault(src, []).append(dst)

522

inversecopies1.setdefault(src, []).append(dst)

519

for dst, src in copies2.items():

523

for dst, src in copies2.items():

520

inversecopies2.setdefault(src, []).append(dst)

524

inversecopies2.setdefault(src, []).append(dst)

521

525

522

copy = {}

526

copy = {}

523

diverge = {}

527

diverge = {}

524

renamedelete = {}

528

renamedelete = {}

525

allsources = set(inversecopies1) | set(inversecopies2)

529

allsources = set(inversecopies1) | set(inversecopies2)

526

for src in allsources:

530

for src in allsources:

527

dsts1 = inversecopies1.get(src)

531

dsts1 = inversecopies1.get(src)

528

dsts2 = inversecopies2.get(src)

532

dsts2 = inversecopies2.get(src)

529

if dsts1 and dsts2:

533

if dsts1 and dsts2:

530

# copied/renamed on both sides

534

# copied/renamed on both sides

531

if src not in m1 and src not in m2:

535

if src not in m1 and src not in m2:

532

# renamed on both sides

536

# renamed on both sides

533

dsts1 = set(dsts1)

537

dsts1 = set(dsts1)

534

dsts2 = set(dsts2)

538

dsts2 = set(dsts2)

535

# If there's some overlap in the rename destinations, we

539

# If there's some overlap in the rename destinations, we

536

# consider it not divergent. For example, if side 1 copies 'a'

540

# consider it not divergent. For example, if side 1 copies 'a'

537

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

541

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

538

# and 'd' and deletes 'a'.

542

# and 'd' and deletes 'a'.

539

if dsts1 & dsts2:

543

if dsts1 & dsts2:

540

for dst in dsts1 & dsts2:

544

for dst in dsts1 & dsts2:

541

copy[dst] = src

545

copy[dst] = src

542

else:

546

else:

543

diverge[src] = sorted(dsts1 | dsts2)

547

diverge[src] = sorted(dsts1 | dsts2)

544

elif src in m1 and src in m2:

548

elif src in m1 and src in m2:

545

# copied on both sides

549

# copied on both sides

546

dsts1 = set(dsts1)

550

dsts1 = set(dsts1)

547

dsts2 = set(dsts2)

551

dsts2 = set(dsts2)

548

for dst in dsts1 & dsts2:

552

for dst in dsts1 & dsts2:

549

copy[dst] = src

553

copy[dst] = src

550

# TODO: Handle cases where it was renamed on one side and copied

554

# TODO: Handle cases where it was renamed on one side and copied

551

# on the other side

555

# on the other side

552

elif dsts1:

556

elif dsts1:

553

# copied/renamed only on side 1

557

# copied/renamed only on side 1

554

_checksinglesidecopies(

558

_checksinglesidecopies(

555

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

559

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

556

)

560

)

557

elif dsts2:

561

elif dsts2:

558

# copied/renamed only on side 2

562

# copied/renamed only on side 2

559

_checksinglesidecopies(

563

_checksinglesidecopies(

560

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

564

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

561

)

565

)

562

566

563

renamedeleteset = set()

567

renamedeleteset = set()

564

divergeset = set()

568

divergeset = set()

565

for dsts in diverge.values():

569

for dsts in diverge.values():

566

divergeset.update(dsts)

570

divergeset.update(dsts)

567

for dsts in renamedelete.values():

571

for dsts in renamedelete.values():

568

renamedeleteset.update(dsts)

572

renamedeleteset.update(dsts)

569

573

570

# find interesting file sets from manifests

574

# find interesting file sets from manifests

571

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

575

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

572

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

576

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

573

u1 = sorted(addedinm1 - addedinm2)

577

u1 = sorted(addedinm1 - addedinm2)

574

u2 = sorted(addedinm2 - addedinm1)

578

u2 = sorted(addedinm2 - addedinm1)

575

579

576

header = b" unmatched files in %s"

580

header = b" unmatched files in %s"

577

if u1:

581

if u1:

578

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

582

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

579

if u2:

583

if u2:

580

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

584

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

581

585

582

fullcopy = copies1.copy()

586

fullcopy = copies1.copy()

583

fullcopy.update(copies2)

587

fullcopy.update(copies2)

584

if not fullcopy:

588

if not fullcopy:

585

return copy, {}, diverge, renamedelete, {}

589

return copy, {}, diverge, renamedelete, {}

586

590

587

if repo.ui.debugflag:

591

if repo.ui.debugflag:

588

repo.ui.debug(

592

repo.ui.debug(

589

b" all copies found (* = to merge, ! = divergent, "

593

b" all copies found (* = to merge, ! = divergent, "

590

b"% = renamed and deleted):\n"

594

b"% = renamed and deleted):\n"

591

)

595

)

592

for f in sorted(fullcopy):

596

for f in sorted(fullcopy):

593

note = b""

597

note = b""

594

if f in copy:

598

if f in copy:

595

note += b"*"

599

note += b"*"

596

if f in divergeset:

600

if f in divergeset:

597

note += b"!"

601

note += b"!"

598

if f in renamedeleteset:

602

if f in renamedeleteset:

599

note += b"%"

603

note += b"%"

600

repo.ui.debug(

604

repo.ui.debug(

601

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

605

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

602

)

606

)

603

del divergeset

607

del divergeset

604

608

605

repo.ui.debug(b" checking for directory renames\n")

609

repo.ui.debug(b" checking for directory renames\n")

606

610

607

# generate a directory move map

611

# generate a directory move map

608

d1, d2 = c1.dirs(), c2.dirs()

612

d1, d2 = c1.dirs(), c2.dirs()

609

invalid = set()

613

invalid = set()

610

dirmove = {}

614

dirmove = {}

611

615

612

# examine each file copy for a potential directory move, which is

616

# examine each file copy for a potential directory move, which is

613

# when all the files in a directory are moved to a new directory

617

# when all the files in a directory are moved to a new directory

614

for dst, src in pycompat.iteritems(fullcopy):

618

for dst, src in pycompat.iteritems(fullcopy):

615

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

619

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

616

if dsrc in invalid:

620

if dsrc in invalid:

617

# already seen to be uninteresting

621

# already seen to be uninteresting

618

continue

622

continue

619

elif dsrc in d1 and ddst in d1:

623

elif dsrc in d1 and ddst in d1:

620

# directory wasn't entirely moved locally

624

# directory wasn't entirely moved locally

621

invalid.add(dsrc)

625

invalid.add(dsrc)

622

elif dsrc in d2 and ddst in d2:

626

elif dsrc in d2 and ddst in d2:

623

# directory wasn't entirely moved remotely

627

# directory wasn't entirely moved remotely

624

invalid.add(dsrc)

628

invalid.add(dsrc)

625

elif dsrc in dirmove and dirmove[dsrc] != ddst:

629

elif dsrc in dirmove and dirmove[dsrc] != ddst:

626

# files from the same directory moved to two different places

630

# files from the same directory moved to two different places

627

invalid.add(dsrc)

631

invalid.add(dsrc)

628

else:

632

else:

629

# looks good so far

633

# looks good so far

630

dirmove[dsrc] = ddst

634

dirmove[dsrc] = ddst

631

635

632

for i in invalid:

636

for i in invalid:

633

if i in dirmove:

637

if i in dirmove:

634

del dirmove[i]

638

del dirmove[i]

635

del d1, d2, invalid

639

del d1, d2, invalid

636

640

637

if not dirmove:

641

if not dirmove:

638

return copy, {}, diverge, renamedelete, {}

642

return copy, {}, diverge, renamedelete, {}

639

643

640

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

644

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

641

645

642

for d in dirmove:

646

for d in dirmove:

643

repo.ui.debug(

647

repo.ui.debug(

644

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

648

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

645

)

649

)

646

650

647

movewithdir = {}

651

movewithdir = {}

648

# check unaccounted nonoverlapping files against directory moves

652

# check unaccounted nonoverlapping files against directory moves

649

for f in u1 + u2:

653

for f in u1 + u2:

650

if f not in fullcopy:

654

if f not in fullcopy:

651

for d in dirmove:

655

for d in dirmove:

652

if f.startswith(d):

656

if f.startswith(d):

653

# new file added in a directory that was moved, move it

657

# new file added in a directory that was moved, move it

654

df = dirmove[d] + f[len(d) :]

658

df = dirmove[d] + f[len(d) :]

655

if df not in copy:

659

if df not in copy:

656

movewithdir[f] = df

660

movewithdir[f] = df

657

repo.ui.debug(

661

repo.ui.debug(

658

b" pending file src: '%s' -> dst: '%s'\n"

662

b" pending file src: '%s' -> dst: '%s'\n"

659

% (f, df)

663

% (f, df)

660

)

664

)

661

break

665

break

662

666

663

return copy, movewithdir, diverge, renamedelete, dirmove

667

return copy, movewithdir, diverge, renamedelete, dirmove

664

668

665

669

666

def _heuristicscopytracing(repo, c1, c2, base):

670

def _heuristicscopytracing(repo, c1, c2, base):

667

""" Fast copytracing using filename heuristics

671

""" Fast copytracing using filename heuristics

668

672

669

Assumes that moves or renames are of following two types:

673

Assumes that moves or renames are of following two types:

670

674

671

1) Inside a directory only (same directory name but different filenames)

675

1) Inside a directory only (same directory name but different filenames)

672

2) Move from one directory to another

676

2) Move from one directory to another

673

(same filenames but different directory names)

677

(same filenames but different directory names)

674

678

675

Works only when there are no merge commits in the "source branch".

679

Works only when there are no merge commits in the "source branch".

676

Source branch is commits from base up to c2 not including base.

680

Source branch is commits from base up to c2 not including base.

677

681

678

If merge is involved it fallbacks to _fullcopytracing().

682

If merge is involved it fallbacks to _fullcopytracing().

679

683

680

Can be used by setting the following config:

684

Can be used by setting the following config:

681

685

682

[experimental]

686

[experimental]

683

copytrace = heuristics

687

copytrace = heuristics

684

688

685

In some cases the copy/move candidates found by heuristics can be very large

689

In some cases the copy/move candidates found by heuristics can be very large

686

in number and that will make the algorithm slow. The number of possible

690

in number and that will make the algorithm slow. The number of possible

687

candidates to check can be limited by using the config

691

candidates to check can be limited by using the config

688

`experimental.copytrace.movecandidateslimit` which defaults to 100.

692

`experimental.copytrace.movecandidateslimit` which defaults to 100.

689

"""

693

"""

690

694

691

if c1.rev() is None:

695

if c1.rev() is None:

692

c1 = c1.p1()

696

c1 = c1.p1()

693

if c2.rev() is None:

697

if c2.rev() is None:

694

c2 = c2.p1()

698

c2 = c2.p1()

695

699

696

copies = {}

700

copies = {}

697

701

698

changedfiles = set()

702

changedfiles = set()

699

m1 = c1.manifest()

703

m1 = c1.manifest()

700

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

704

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

701

# If base is not in c2 branch, we switch to fullcopytracing

705

# If base is not in c2 branch, we switch to fullcopytracing

702

repo.ui.debug(

706

repo.ui.debug(

703

b"switching to full copytracing as base is not "

707

b"switching to full copytracing as base is not "

704

b"an ancestor of c2\n"

708

b"an ancestor of c2\n"

705

)

709

)

706

return _fullcopytracing(repo, c1, c2, base)

710

return _fullcopytracing(repo, c1, c2, base)

707

711

708

ctx = c2

712

ctx = c2

709

while ctx != base:

713

while ctx != base:

710

if len(ctx.parents()) == 2:

714

if len(ctx.parents()) == 2:

711

# To keep things simple let's not handle merges

715

# To keep things simple let's not handle merges

712

repo.ui.debug(b"switching to full copytracing because of merges\n")

716

repo.ui.debug(b"switching to full copytracing because of merges\n")

713

return _fullcopytracing(repo, c1, c2, base)

717

return _fullcopytracing(repo, c1, c2, base)

714

changedfiles.update(ctx.files())

718

changedfiles.update(ctx.files())

715

ctx = ctx.p1()

719

ctx = ctx.p1()

716

720

717

cp = _forwardcopies(base, c2)

721

cp = _forwardcopies(base, c2)

718

for dst, src in pycompat.iteritems(cp):

722

for dst, src in pycompat.iteritems(cp):

719

if src in m1:

723

if src in m1:

720

copies[dst] = src

724

copies[dst] = src

721

725

722

# file is missing if it isn't present in the destination, but is present in

726

# file is missing if it isn't present in the destination, but is present in

723

# the base and present in the source.

727

# the base and present in the source.

724

# Presence in the base is important to exclude added files, presence in the

728

# Presence in the base is important to exclude added files, presence in the

725

# source is important to exclude removed files.

729

# source is important to exclude removed files.

726

filt = lambda f: f not in m1 and f in base and f in c2

730

filt = lambda f: f not in m1 and f in base and f in c2

727

missingfiles = [f for f in changedfiles if filt(f)]

731

missingfiles = [f for f in changedfiles if filt(f)]

728

732

729

if missingfiles:

733

if missingfiles:

730

basenametofilename = collections.defaultdict(list)

734

basenametofilename = collections.defaultdict(list)

731

dirnametofilename = collections.defaultdict(list)

735

dirnametofilename = collections.defaultdict(list)

732

736

733

for f in m1.filesnotin(base.manifest()):

737

for f in m1.filesnotin(base.manifest()):

734

basename = os.path.basename(f)

738

basename = os.path.basename(f)

735

dirname = os.path.dirname(f)

739

dirname = os.path.dirname(f)

736

basenametofilename[basename].append(f)

740

basenametofilename[basename].append(f)

737

dirnametofilename[dirname].append(f)

741

dirnametofilename[dirname].append(f)

738

742

739

for f in missingfiles:

743

for f in missingfiles:

740

basename = os.path.basename(f)

744

basename = os.path.basename(f)

741

dirname = os.path.dirname(f)

745

dirname = os.path.dirname(f)

742

samebasename = basenametofilename[basename]

746

samebasename = basenametofilename[basename]

743

samedirname = dirnametofilename[dirname]

747

samedirname = dirnametofilename[dirname]

744

movecandidates = samebasename + samedirname

748

movecandidates = samebasename + samedirname

745

# f is guaranteed to be present in c2, that's why

749

# f is guaranteed to be present in c2, that's why

746

# c2.filectx(f) won't fail

750

# c2.filectx(f) won't fail

747

f2 = c2.filectx(f)

751

f2 = c2.filectx(f)

748

# we can have a lot of candidates which can slow down the heuristics

752

# we can have a lot of candidates which can slow down the heuristics

749

# config value to limit the number of candidates moves to check

753

# config value to limit the number of candidates moves to check

750

maxcandidates = repo.ui.configint(

754

maxcandidates = repo.ui.configint(

751

b'experimental', b'copytrace.movecandidateslimit'

755

b'experimental', b'copytrace.movecandidateslimit'

752

)

756

)

753

757

754

if len(movecandidates) > maxcandidates:

758

if len(movecandidates) > maxcandidates:

755

repo.ui.status(

759

repo.ui.status(

756

_(

760

_(

757

b"skipping copytracing for '%s', more "

761

b"skipping copytracing for '%s', more "

758

b"candidates than the limit: %d\n"

762

b"candidates than the limit: %d\n"

759

)

763

)

760

% (f, len(movecandidates))

764

% (f, len(movecandidates))

761

)

765

)

762

continue

766

continue

763

767

764

for candidate in movecandidates:

768

for candidate in movecandidates:

765

f1 = c1.filectx(candidate)

769

f1 = c1.filectx(candidate)

766

if _related(f1, f2):

770

if _related(f1, f2):

767

# if there are a few related copies then we'll merge

771

# if there are a few related copies then we'll merge

768

# changes into all of them. This matches the behaviour

772

# changes into all of them. This matches the behaviour

769

# of upstream copytracing

773

# of upstream copytracing

770

copies[candidate] = f

774

copies[candidate] = f

771

775

772

return copies, {}, {}, {}, {}

776

return copies, {}, {}, {}, {}

773

777

774

778

775

def _related(f1, f2):

779

def _related(f1, f2):

776

"""return True if f1 and f2 filectx have a common ancestor

780

"""return True if f1 and f2 filectx have a common ancestor

777

781

778

Walk back to common ancestor to see if the two files originate

782

Walk back to common ancestor to see if the two files originate

779

from the same file. Since workingfilectx's rev() is None it messes

783

from the same file. Since workingfilectx's rev() is None it messes

780

up the integer comparison logic, hence the pre-step check for

784

up the integer comparison logic, hence the pre-step check for

781

None (f1 and f2 can only be workingfilectx's initially).

785

None (f1 and f2 can only be workingfilectx's initially).

782

"""

786

"""

783

787

784

if f1 == f2:

788

if f1 == f2:

785

return True # a match

789

return True # a match

786

790

787

g1, g2 = f1.ancestors(), f2.ancestors()

791

g1, g2 = f1.ancestors(), f2.ancestors()

788

try:

792

try:

789

f1r, f2r = f1.linkrev(), f2.linkrev()

793

f1r, f2r = f1.linkrev(), f2.linkrev()

790

794

791

if f1r is None:

795

if f1r is None:

792

f1 = next(g1)

796

f1 = next(g1)

793

if f2r is None:

797

if f2r is None:

794

f2 = next(g2)

798

f2 = next(g2)

795

799

796

while True:

800

while True:

797

f1r, f2r = f1.linkrev(), f2.linkrev()

801

f1r, f2r = f1.linkrev(), f2.linkrev()

798

if f1r > f2r:

802

if f1r > f2r:

799

f1 = next(g1)

803

f1 = next(g1)

800

elif f2r > f1r:

804

elif f2r > f1r:

801

f2 = next(g2)

805

f2 = next(g2)

802

else: # f1 and f2 point to files in the same linkrev

806

else: # f1 and f2 point to files in the same linkrev

803

return f1 == f2 # true if they point to the same file

807

return f1 == f2 # true if they point to the same file

804

except StopIteration:

808

except StopIteration:

805

return False

809

return False

806

810

807

811

808

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

812

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

809

"""reproduce copies from fromrev to rev in the dirstate

813

"""reproduce copies from fromrev to rev in the dirstate

810

814

811

If skiprev is specified, it's a revision that should be used to

815

If skiprev is specified, it's a revision that should be used to

812

filter copy records. Any copies that occur between fromrev and

816

filter copy records. Any copies that occur between fromrev and

813

skiprev will not be duplicated, even if they appear in the set of

817

skiprev will not be duplicated, even if they appear in the set of

814

copies between fromrev and rev.

818

copies between fromrev and rev.

815

"""

819

"""

816

exclude = {}

820

exclude = {}

817

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

821

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

818

bctrace = stringutil.parsebool(ctraceconfig)

822

bctrace = stringutil.parsebool(ctraceconfig)

819

if skiprev is not None and (

823

if skiprev is not None and (

820

ctraceconfig == b'heuristics' or bctrace or bctrace is None

824

ctraceconfig == b'heuristics' or bctrace or bctrace is None

821

):

825

):

822

# copytrace='off' skips this line, but not the entire function because

826

# copytrace='off' skips this line, but not the entire function because

823

# the line below is O(size of the repo) during a rebase, while the rest

827

# the line below is O(size of the repo) during a rebase, while the rest

824

# of the function is much faster (and is required for carrying copy

828

# of the function is much faster (and is required for carrying copy

825

# metadata across the rebase anyway).

829

# metadata across the rebase anyway).

826

exclude = pathcopies(repo[fromrev], repo[skiprev])

830

exclude = pathcopies(repo[fromrev], repo[skiprev])

827

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

831

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

828

if dst in exclude:

832

if dst in exclude:

829

continue

833

continue

830

if dst in wctx:

834

if dst in wctx:

831

wctx[dst].markcopied(src)

835

wctx[dst].markcopied(src)

832

836

833

837

834

def computechangesetfilesadded(ctx):

838

def computechangesetfilesadded(ctx):

835

"""return the list of files added in a changeset

839

"""return the list of files added in a changeset

836

"""

840

"""

837

added = []

841

added = []

838

for f in ctx.files():

842

for f in ctx.files():

839

if not any(f in p for p in ctx.parents()):

843

if not any(f in p for p in ctx.parents()):

840

added.append(f)

844

added.append(f)

841

return added

845

return added

842

846

843

847

844

def computechangesetfilesremoved(ctx):

848

def computechangesetfilesremoved(ctx):

845

"""return the list of files removed in a changeset

849

"""return the list of files removed in a changeset

846

"""

850

"""

847

removed = []

851

removed = []

848

for f in ctx.files():

852

for f in ctx.files():

849

if f not in ctx:

853

if f not in ctx:

850

removed.append(f)

854

removed.append(f)

851

return removed

855

return removed

852

856

853

857

854

def computechangesetcopies(ctx):

858

def computechangesetcopies(ctx):

855

"""return the copies data for a changeset

859

"""return the copies data for a changeset

856

860

857

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

861

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

858

862

859

Each dictionnary are in the form: `{newname: oldname}`

863

Each dictionnary are in the form: `{newname: oldname}`

860

"""

864

"""

861

p1copies = {}

865

p1copies = {}

862

p2copies = {}

866

p2copies = {}

863

p1 = ctx.p1()

867

p1 = ctx.p1()

864

p2 = ctx.p2()

868

p2 = ctx.p2()

865

narrowmatch = ctx._repo.narrowmatch()

869

narrowmatch = ctx._repo.narrowmatch()

866

for dst in ctx.files():

870

for dst in ctx.files():

867

if not narrowmatch(dst) or dst not in ctx:

871

if not narrowmatch(dst) or dst not in ctx:

868

continue

872

continue

869

copied = ctx[dst].renamed()

873

copied = ctx[dst].renamed()

870

if not copied:

874

if not copied:

871

continue

875

continue

872

src, srcnode = copied

876

src, srcnode = copied

873

if src in p1 and p1[src].filenode() == srcnode:

877

if src in p1 and p1[src].filenode() == srcnode:

874

p1copies[dst] = src

878

p1copies[dst] = src

875

elif src in p2 and p2[src].filenode() == srcnode:

879

elif src in p2 and p2[src].filenode() == srcnode:

876

p2copies[dst] = src

880

p2copies[dst] = src

877

return p1copies, p2copies

881

return p1copies, p2copies

878

882

879

883

880

def encodecopies(files, copies):

884

def encodecopies(files, copies):

881

items = []

885

items = []

882

for i, dst in enumerate(files):

886

for i, dst in enumerate(files):

883

if dst in copies:

887

if dst in copies:

884

items.append(b'%d\0%s' % (i, copies[dst]))

888

items.append(b'%d\0%s' % (i, copies[dst]))

885

if len(items) != len(copies):

889

if len(items) != len(copies):

886

raise error.ProgrammingError(

890

raise error.ProgrammingError(

887

b'some copy targets missing from file list'

891

b'some copy targets missing from file list'

888

)

892

)

889

return b"\n".join(items)

893

return b"\n".join(items)

890

894

891

895

892

def decodecopies(files, data):

896

def decodecopies(files, data):

893

try:

897

try:

894

copies = {}

898

copies = {}

895

if not data:

899

if not data:

896

return copies

900

return copies

897

for l in data.split(b'\n'):

901

for l in data.split(b'\n'):

898

strindex, src = l.split(b'\0')

902

strindex, src = l.split(b'\0')

899

i = int(strindex)

903

i = int(strindex)

900

dst = files[i]

904

dst = files[i]

901

copies[dst] = src

905

copies[dst] = src

902

return copies

906

return copies

903

except (ValueError, IndexError):

907

except (ValueError, IndexError):

904

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

908

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

905

# used different syntax for the value.

909

# used different syntax for the value.

906

return None

910

return None

907

911

908

912

909

def encodefileindices(files, subset):

913

def encodefileindices(files, subset):

910

subset = set(subset)

914

subset = set(subset)

911

indices = []

915

indices = []

912

for i, f in enumerate(files):

916

for i, f in enumerate(files):

913

if f in subset:

917

if f in subset:

914

indices.append(b'%d' % i)

918

indices.append(b'%d' % i)

915

return b'\n'.join(indices)

919

return b'\n'.join(indices)

916

920

917

921

918

def decodefileindices(files, data):

922

def decodefileindices(files, data):

919

try:

923

try:

920

subset = []

924

subset = []

921

if not data:

925

if not data:

922

return subset

926

return subset

923

for strindex in data.split(b'\n'):

927

for strindex in data.split(b'\n'):

924

i = int(strindex)

928

i = int(strindex)

925

if i < 0 or i >= len(files):

929

if i < 0 or i >= len(files):

926

return None

930

return None

927

subset.append(files[i])

931

subset.append(files[i])

928

return subset

932

return subset

929

except (ValueError, IndexError):

933

except (ValueError, IndexError):

930

# Perhaps someone had chosen the same key name (e.g. "added") and

934

# Perhaps someone had chosen the same key name (e.g. "added") and

931

# used different syntax for the value.

935

# used different syntax for the value.

932

return None

936

return None

933

937

934

938

935

def _getsidedata(srcrepo, rev):

939

def _getsidedata(srcrepo, rev):

936

ctx = srcrepo[rev]

940

ctx = srcrepo[rev]

937

filescopies = computechangesetcopies(ctx)

941

filescopies = computechangesetcopies(ctx)

938

filesadded = computechangesetfilesadded(ctx)

942

filesadded = computechangesetfilesadded(ctx)

939

filesremoved = computechangesetfilesremoved(ctx)

943

filesremoved = computechangesetfilesremoved(ctx)

940

sidedata = {}

944

sidedata = {}

941

if any([filescopies, filesadded, filesremoved]):

945

if any([filescopies, filesadded, filesremoved]):

942

sortedfiles = sorted(ctx.files())

946

sortedfiles = sorted(ctx.files())

943

p1copies, p2copies = filescopies

947

p1copies, p2copies = filescopies

944

p1copies = encodecopies(sortedfiles, p1copies)

948

p1copies = encodecopies(sortedfiles, p1copies)

945

p2copies = encodecopies(sortedfiles, p2copies)

949

p2copies = encodecopies(sortedfiles, p2copies)

946

filesadded = encodefileindices(sortedfiles, filesadded)

950

filesadded = encodefileindices(sortedfiles, filesadded)

947

filesremoved = encodefileindices(sortedfiles, filesremoved)

951

filesremoved = encodefileindices(sortedfiles, filesremoved)

948

if p1copies:

952

if p1copies:

949

sidedata[sidedatamod.SD_P1COPIES] = p1copies

953

sidedata[sidedatamod.SD_P1COPIES] = p1copies

950

if p2copies:

954

if p2copies:

951

sidedata[sidedatamod.SD_P2COPIES] = p2copies

955

sidedata[sidedatamod.SD_P2COPIES] = p2copies

952

if filesadded:

956

if filesadded:

953

sidedata[sidedatamod.SD_FILESADDED] = filesadded

957

sidedata[sidedatamod.SD_FILESADDED] = filesadded

954

if filesremoved:

958

if filesremoved:

955

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

959

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

956

return sidedata

960

return sidedata

957

961

958

962

959

def getsidedataadder(srcrepo, destrepo):

963

def getsidedataadder(srcrepo, destrepo):

960

def sidedatacompanion(revlog, rev):

964

def sidedatacompanion(revlog, rev):

961

sidedata = {}

965

sidedata = {}

962

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

966

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

963

sidedata = _getsidedata(srcrepo, rev)

967

sidedata = _getsidedata(srcrepo, rev)

964

return False, (), sidedata

968

return False, (), sidedata

965

969

966

return sidedatacompanion

970

return sidedatacompanion

967

971

968

972

969

def getsidedataremover(srcrepo, destrepo):

973

def getsidedataremover(srcrepo, destrepo):

970

def sidedatacompanion(revlog, rev):

974

def sidedatacompanion(revlog, rev):

971

f = ()

975

f = ()

972

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

976

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

973

if revlog.flags(rev) & REVIDX_SIDEDATA:

977

if revlog.flags(rev) & REVIDX_SIDEDATA:

974

f = (

978

f = (

975

sidedatamod.SD_P1COPIES,

979

sidedatamod.SD_P1COPIES,

976

sidedatamod.SD_P2COPIES,

980

sidedatamod.SD_P2COPIES,

977

sidedatamod.SD_FILESADDED,

981

sidedatamod.SD_FILESADDED,

978

sidedatamod.SD_FILESREMOVED,

982

sidedatamod.SD_FILESREMOVED,

979

)

983

)

980

return False, f, {}

984

return False, f, {}

981

985

982

return sidedatacompanion

986

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 if repo.filecopiesmode == b'changeset-sidedata':
                     changelogrevision = cl.changelogrevision
                     flags = cl.flags
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         if flags(rev) & REVIDX_SIDEDATA:
                             c = changelogrevision(rev)
                             p1copies = c.p1copies
                             p2copies = c.p2copies
                             removed = c.filesremoved
                         else:
                             p1copies = {}
                             p2copies = {}
                             removed = ()
                         return p1, p2, p1copies, p2copies, removed
                 else:
                     def revinfo(rev):
                         p1, p2 = parents(rev)
                         ctx = repo[rev]
                         p1copies, p2copies = ctx._copies
                         removed = ctx.filesremoved()
                         return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 all_copies = {r: {} for r in roots}
                 alwaysmatch = match.always()
                 for r in sorted(iterrevs):
                     copies = all_copies.pop(r)
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
-                        # Copy the dict only if later iterations will also need it
-                        if i != len(children[r]) - 1:
-                            newcopies = copies.copy()
-                        else:
                         newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
+                            # _chain makes a copies, we can avoid doing so in some
+                            # simple/linear cases.
+                            assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
+                                if newcopies is copies:
+                                    # copy on write to avoid affecting potential other
+                                    # branches.  when there are no other branches, this
+                                    # could be avoided.
+                                    newcopies = copies.copy()
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 return all_copies[b.rev()]
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion