upstream/mercurial-mirror Commit - r43551:675c776f

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(a, b):

67

def _chain(a, b):

68

"""chain two sets of copies 'a' and 'b'"""

68

"""chain two sets of copies 'a' and 'b'"""

69

t = a.copy()

69

t = a.copy()

70

for k, v in pycompat.iteritems(b):

70

for k, v in pycompat.iteritems(b):

71

if v in t:

71

if v in t:

72

t[k] = t[v]

72

t[k] = t[v]

73

else:

73

else:

74

t[k] = v

74

t[k] = v

75

return t

75

return t

76

77

78

def _tracefile(fctx, am, basemf):

78

def _tracefile(fctx, am, basemf):

79

"""return file context that is the ancestor of fctx present in ancestor

79

"""return file context that is the ancestor of fctx present in ancestor

80

manifest am

80

manifest am

81

82

Note: we used to try and stop after a given limit, however checking if that

82

Note: we used to try and stop after a given limit, however checking if that

83

limit is reached turned out to be very expensive. we are better off

83

limit is reached turned out to be very expensive. we are better off

84

disabling that feature."""

84

disabling that feature."""

85

86

for f in fctx.ancestors():

86

for f in fctx.ancestors():

87

path = f.path()

87

path = f.path()

88

if am.get(path, None) == f.filenode():

88

if am.get(path, None) == f.filenode():

89

return path

89

return path

90

if basemf and basemf.get(path, None) == f.filenode():

90

if basemf and basemf.get(path, None) == f.filenode():

91

return path

91

return path

92

93

94

def _dirstatecopies(repo, match=None):

94

def _dirstatecopies(repo, match=None):

95

ds = repo.dirstate

95

ds = repo.dirstate

96

c = ds.copies().copy()

96

c = ds.copies().copy()

97

for k in list(c):

97

for k in list(c):

98

if ds[k] not in b'anm' or (match and not match(k)):

98

if ds[k] not in b'anm' or (match and not match(k)):

99

del c[k]

99

del c[k]

100

return c

100

return c

101

102

103

def _computeforwardmissing(a, b, match=None):

103

def _computeforwardmissing(a, b, match=None):

104

"""Computes which files are in b but not a.

104

"""Computes which files are in b but not a.

105

This is its own function so extensions can easily wrap this call to see what

105

This is its own function so extensions can easily wrap this call to see what

106

files _forwardcopies is about to process.

106

files _forwardcopies is about to process.

107

"""

107

"""

108

ma = a.manifest()

108

ma = a.manifest()

109

mb = b.manifest()

109

mb = b.manifest()

110

return mb.filesnotin(ma, match=match)

110

return mb.filesnotin(ma, match=match)

111

112

113

def usechangesetcentricalgo(repo):

113

def usechangesetcentricalgo(repo):

114

"""Checks if we should use changeset-centric copy algorithms"""

114

"""Checks if we should use changeset-centric copy algorithms"""

115

if repo.filecopiesmode == b'changeset-sidedata':

115

if repo.filecopiesmode == b'changeset-sidedata':

116

return True

116

return True

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

118

changesetsource = (b'changeset-only', b'compatibility')

118

changesetsource = (b'changeset-only', b'compatibility')

119

return readfrom in changesetsource

119

return readfrom in changesetsource

120

121

122

def _committedforwardcopies(a, b, base, match):

122

def _committedforwardcopies(a, b, base, match):

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

124

# files might have to be traced back to the fctx parent of the last

124

# files might have to be traced back to the fctx parent of the last

125

# one-side-only changeset, but not further back than that

125

# one-side-only changeset, but not further back than that

126

repo = a._repo

126

repo = a._repo

127

128

if usechangesetcentricalgo(repo):

128

if usechangesetcentricalgo(repo):

129

return _changesetforwardcopies(a, b, match)

129

return _changesetforwardcopies(a, b, match)

130

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

132

dbg = repo.ui.debug

132

dbg = repo.ui.debug

133

if debug:

133

if debug:

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

135

am = a.manifest()

135

am = a.manifest()

136

basemf = None if base is None else base.manifest()

136

basemf = None if base is None else base.manifest()

137

138

# find where new files came from

138

# find where new files came from

139

# we currently don't try to find where old files went, too expensive

139

# we currently don't try to find where old files went, too expensive

140

# this means we can miss a case like 'hg rm b; hg cp a b'

140

# this means we can miss a case like 'hg rm b; hg cp a b'

141

cm = {}

141

cm = {}

142

143

# Computing the forward missing is quite expensive on large manifests, since

143

# Computing the forward missing is quite expensive on large manifests, since

144

# it compares the entire manifests. We can optimize it in the common use

144

# it compares the entire manifests. We can optimize it in the common use

145

# case of computing what copies are in a commit versus its parent (like

145

# case of computing what copies are in a commit versus its parent (like

146

# during a rebase or histedit). Note, we exclude merge commits from this

146

# during a rebase or histedit). Note, we exclude merge commits from this

147

# optimization, since the ctx.files() for a merge commit is not correct for

147

# optimization, since the ctx.files() for a merge commit is not correct for

148

# this comparison.

148

# this comparison.

149

forwardmissingmatch = match

149

forwardmissingmatch = match

150

if b.p1() == a and b.p2().node() == node.nullid:

150

if b.p1() == a and b.p2().node() == node.nullid:

151

filesmatcher = matchmod.exact(b.files())

151

filesmatcher = matchmod.exact(b.files())

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

154

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

156

157

if debug:

157

if debug:

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

159

160

for f in sorted(missing):

160

for f in sorted(missing):

161

if debug:

161

if debug:

162

dbg(b'debug.copies: tracing file: %s\n' % f)

162

dbg(b'debug.copies: tracing file: %s\n' % f)

163

fctx = b[f]

163

fctx = b[f]

164

fctx._ancestrycontext = ancestrycontext

164

fctx._ancestrycontext = ancestrycontext

165

166

if debug:

166

if debug:

167

start = util.timer()

167

start = util.timer()

168

opath = _tracefile(fctx, am, basemf)

168

opath = _tracefile(fctx, am, basemf)

169

if opath:

169

if opath:

170

if debug:

170

if debug:

171

dbg(b'debug.copies: rename of: %s\n' % opath)

171

dbg(b'debug.copies: rename of: %s\n' % opath)

172

cm[f] = opath

172

cm[f] = opath

173

if debug:

173

if debug:

174

dbg(

174

dbg(

175

b'debug.copies: time: %f seconds\n'

175

b'debug.copies: time: %f seconds\n'

176

% (util.timer() - start)

176

% (util.timer() - start)

177

)

177

)

178

return cm

178

return cm

179

180

181

def _revinfogetter(repo):

181

def _revinfogetter(repo):

182

"""return a function that return multiple data given a <rev>"i

182

"""return a function that return multiple data given a <rev>"i

183

184

* p1: revision number of first parent

184

* p1: revision number of first parent

185

* p2: revision number of first parent

185

* p2: revision number of first parent

186

* p1copies: mapping of copies from p1

186

* p1copies: mapping of copies from p1

187

* p2copies: mapping of copies from p2

187

* p2copies: mapping of copies from p2

188

* removed: a list of removed files

188

* removed: a list of removed files

189

"""

189

"""

190

cl = repo.changelog

190

cl = repo.changelog

191

parents = cl.parentrevs

191

parents = cl.parentrevs

192

193

def revinfo(rev):

193

if repo.filecopiesmode == b'changeset-sidedata':

194

p1, p2 = parents(rev)

194

changelogrevision = cl.changelogrevision

195

ctx = repo[rev]

195

196

p1copies, p2copies = ctx._copies

196

def revinfo(rev):

197

removed = ctx.filesremoved()

197

p1, p2 = parents(rev)

198

return p1, p2, p1copies, p2copies, removed

198

c = changelogrevision(rev)

199

p1copies = c.p1copies

200

p2copies = c.p2copies

201

removed = c.filesremoved

202

return p1, p2, p1copies, p2copies, removed

203

204

else:

205

206

def revinfo(rev):

207

p1, p2 = parents(rev)

208

ctx = repo[rev]

209

p1copies, p2copies = ctx._copies

210

removed = ctx.filesremoved()

211

return p1, p2, p1copies, p2copies, removed

199

212

200

return revinfo

213

return revinfo

201

214

202

215

203

def _changesetforwardcopies(a, b, match):

216

def _changesetforwardcopies(a, b, match):

204

if a.rev() in (node.nullrev, b.rev()):

217

if a.rev() in (node.nullrev, b.rev()):

205

return {}

218

return {}

206

219

207

repo = a.repo().unfiltered()

220

repo = a.repo().unfiltered()

208

children = {}

221

children = {}

209

revinfo = _revinfogetter(repo)

222

revinfo = _revinfogetter(repo)

210

223

211

cl = repo.changelog

224

cl = repo.changelog

212

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

225

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

213

for r in missingrevs:

226

for r in missingrevs:

214

for p in cl.parentrevs(r):

227

for p in cl.parentrevs(r):

215

if p == node.nullrev:

228

if p == node.nullrev:

216

continue

229

continue

217

if p not in children:

230

if p not in children:

218

children[p] = [r]

231

children[p] = [r]

219

else:

232

else:

220

children[p].append(r)

233

children[p].append(r)

221

234

222

roots = set(children) - set(missingrevs)

235

roots = set(children) - set(missingrevs)

223

work = list(roots)

236

work = list(roots)

224

all_copies = {r: {} for r in roots}

237

all_copies = {r: {} for r in roots}

225

heapq.heapify(work)

238

heapq.heapify(work)

226

alwaysmatch = match.always()

239

alwaysmatch = match.always()

227

while work:

240

while work:

228

r = heapq.heappop(work)

241

r = heapq.heappop(work)

229

copies = all_copies.pop(r)

242

copies = all_copies.pop(r)

230

if r == b.rev():

243

if r == b.rev():

231

return copies

244

return copies

232

for i, c in enumerate(children[r]):

245

for i, c in enumerate(children[r]):

233

p1, p2, p1copies, p2copies, removed = revinfo(c)

246

p1, p2, p1copies, p2copies, removed = revinfo(c)

234

if r == p1:

247

if r == p1:

235

parent = 1

248

parent = 1

236

childcopies = p1copies

249

childcopies = p1copies

237

else:

250

else:

238

assert r == p2

251

assert r == p2

239

parent = 2

252

parent = 2

240

childcopies = p2copies

253

childcopies = p2copies

241

if not alwaysmatch:

254

if not alwaysmatch:

242

childcopies = {

255

childcopies = {

243

dst: src for dst, src in childcopies.items() if match(dst)

256

dst: src for dst, src in childcopies.items() if match(dst)

244

}

257

}

245

# Copy the dict only if later iterations will also need it

258

# Copy the dict only if later iterations will also need it

246

if i != len(children[r]) - 1:

259

if i != len(children[r]) - 1:

247

newcopies = copies.copy()

260

newcopies = copies.copy()

248

else:

261

else:

249

newcopies = copies

262

newcopies = copies

250

if childcopies:

263

if childcopies:

251

newcopies = _chain(newcopies, childcopies)

264

newcopies = _chain(newcopies, childcopies)

252

for f in removed:

265

for f in removed:

253

if f in newcopies:

266

if f in newcopies:

254

del newcopies[f]

267

del newcopies[f]

255

othercopies = all_copies.get(c)

268

othercopies = all_copies.get(c)

256

if othercopies is None:

269

if othercopies is None:

257

heapq.heappush(work, c)

270

heapq.heappush(work, c)

258

all_copies[c] = newcopies

271

all_copies[c] = newcopies

259

else:

272

else:

260

# we are the second parent to work on c, we need to merge our

273

# we are the second parent to work on c, we need to merge our

261

# work with the other.

274

# work with the other.

262

#

275

#

263

# Unlike when copies are stored in the filelog, we consider

276

# Unlike when copies are stored in the filelog, we consider

264

# it a copy even if the destination already existed on the

277

# it a copy even if the destination already existed on the

265

# other branch. It's simply too expensive to check if the

278

# other branch. It's simply too expensive to check if the

266

# file existed in the manifest.

279

# file existed in the manifest.

267

#

280

#

268

# In case of conflict, parent 1 take precedence over parent 2.

281

# In case of conflict, parent 1 take precedence over parent 2.

269

# This is an arbitrary choice made anew when implementing

282

# This is an arbitrary choice made anew when implementing

270

# changeset based copies. It was made without regards with

283

# changeset based copies. It was made without regards with

271

# potential filelog related behavior.

284

# potential filelog related behavior.

272

if parent == 1:

285

if parent == 1:

273

othercopies.update(newcopies)

286

othercopies.update(newcopies)

274

else:

287

else:

275

newcopies.update(othercopies)

288

newcopies.update(othercopies)

276

all_copies[c] = newcopies

289

all_copies[c] = newcopies

277

assert False

290

assert False

278

291

279

292

280

def _forwardcopies(a, b, base=None, match=None):

293

def _forwardcopies(a, b, base=None, match=None):

281

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

294

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

282

295

283

if base is None:

296

if base is None:

284

base = a

297

base = a

285

match = a.repo().narrowmatch(match)

298

match = a.repo().narrowmatch(match)

286

# check for working copy

299

# check for working copy

287

if b.rev() is None:

300

if b.rev() is None:

288

cm = _committedforwardcopies(a, b.p1(), base, match)

301

cm = _committedforwardcopies(a, b.p1(), base, match)

289

# combine copies from dirstate if necessary

302

# combine copies from dirstate if necessary

290

copies = _chain(cm, _dirstatecopies(b._repo, match))

303

copies = _chain(cm, _dirstatecopies(b._repo, match))

291

else:

304

else:

292

copies = _committedforwardcopies(a, b, base, match)

305

copies = _committedforwardcopies(a, b, base, match)

293

return copies

306

return copies

294

307

295

308

296

def _backwardrenames(a, b, match):

309

def _backwardrenames(a, b, match):

297

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

310

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

298

return {}

311

return {}

299

312

300

# Even though we're not taking copies into account, 1:n rename situations

313

# Even though we're not taking copies into account, 1:n rename situations

301

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

314

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

302

# arbitrarily pick one of the renames.

315

# arbitrarily pick one of the renames.

303

# We don't want to pass in "match" here, since that would filter

316

# We don't want to pass in "match" here, since that would filter

304

# the destination by it. Since we're reversing the copies, we want

317

# the destination by it. Since we're reversing the copies, we want

305

# to filter the source instead.

318

# to filter the source instead.

306

f = _forwardcopies(b, a)

319

f = _forwardcopies(b, a)

307

r = {}

320

r = {}

308

for k, v in sorted(pycompat.iteritems(f)):

321

for k, v in sorted(pycompat.iteritems(f)):

309

if match and not match(v):

322

if match and not match(v):

310

continue

323

continue

311

# remove copies

324

# remove copies

312

if v in a:

325

if v in a:

313

continue

326

continue

314

r[v] = k

327

r[v] = k

315

return r

328

return r

316

329

317

330

318

def pathcopies(x, y, match=None):

331

def pathcopies(x, y, match=None):

319

"""find {dst@y: src@x} copy mapping for directed compare"""

332

"""find {dst@y: src@x} copy mapping for directed compare"""

320

repo = x._repo

333

repo = x._repo

321

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

334

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

322

if debug:

335

if debug:

323

repo.ui.debug(

336

repo.ui.debug(

324

b'debug.copies: searching copies from %s to %s\n' % (x, y)

337

b'debug.copies: searching copies from %s to %s\n' % (x, y)

325

)

338

)

326

if x == y or not x or not y:

339

if x == y or not x or not y:

327

return {}

340

return {}

328

a = y.ancestor(x)

341

a = y.ancestor(x)

329

if a == x:

342

if a == x:

330

if debug:

343

if debug:

331

repo.ui.debug(b'debug.copies: search mode: forward\n')

344

repo.ui.debug(b'debug.copies: search mode: forward\n')

332

if y.rev() is None and x == y.p1():

345

if y.rev() is None and x == y.p1():

333

# short-circuit to avoid issues with merge states

346

# short-circuit to avoid issues with merge states

334

return _dirstatecopies(repo, match)

347

return _dirstatecopies(repo, match)

335

copies = _forwardcopies(x, y, match=match)

348

copies = _forwardcopies(x, y, match=match)

336

elif a == y:

349

elif a == y:

337

if debug:

350

if debug:

338

repo.ui.debug(b'debug.copies: search mode: backward\n')

351

repo.ui.debug(b'debug.copies: search mode: backward\n')

339

copies = _backwardrenames(x, y, match=match)

352

copies = _backwardrenames(x, y, match=match)

340

else:

353

else:

341

if debug:

354

if debug:

342

repo.ui.debug(b'debug.copies: search mode: combined\n')

355

repo.ui.debug(b'debug.copies: search mode: combined\n')

343

base = None

356

base = None

344

if a.rev() != node.nullrev:

357

if a.rev() != node.nullrev:

345

base = x

358

base = x

346

copies = _chain(

359

copies = _chain(

347

_backwardrenames(x, a, match=match),

360

_backwardrenames(x, a, match=match),

348

_forwardcopies(a, y, base, match=match),

361

_forwardcopies(a, y, base, match=match),

349

)

362

)

350

_filter(x, y, copies)

363

_filter(x, y, copies)

351

return copies

364

return copies

352

365

353

366

354

def mergecopies(repo, c1, c2, base):

367

def mergecopies(repo, c1, c2, base):

355

"""

368

"""

356

Finds moves and copies between context c1 and c2 that are relevant for

369

Finds moves and copies between context c1 and c2 that are relevant for

357

merging. 'base' will be used as the merge base.

370

merging. 'base' will be used as the merge base.

358

371

359

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

372

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

360

files that were moved/ copied in one merge parent and modified in another.

373

files that were moved/ copied in one merge parent and modified in another.

361

For example:

374

For example:

362

375

363

o ---> 4 another commit

376

o ---> 4 another commit

364

|

377

|

365

| o ---> 3 commit that modifies a.txt

378

| o ---> 3 commit that modifies a.txt

366

| /

379

| /

367

o / ---> 2 commit that moves a.txt to b.txt

380

o / ---> 2 commit that moves a.txt to b.txt

368

|/

381

|/

369

o ---> 1 merge base

382

o ---> 1 merge base

370

383

371

If we try to rebase revision 3 on revision 4, since there is no a.txt in

384

If we try to rebase revision 3 on revision 4, since there is no a.txt in

372

revision 4, and if user have copytrace disabled, we prints the following

385

revision 4, and if user have copytrace disabled, we prints the following

373

message:

386

message:

374

387

375

```other changed <file> which local deleted```

388

```other changed <file> which local deleted```

376

389

377

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

390

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

378

"dirmove".

391

"dirmove".

379

392

380

"copy" is a mapping from destination name -> source name,

393

"copy" is a mapping from destination name -> source name,

381

where source is in c1 and destination is in c2 or vice-versa.

394

where source is in c1 and destination is in c2 or vice-versa.

382

395

383

"movewithdir" is a mapping from source name -> destination name,

396

"movewithdir" is a mapping from source name -> destination name,

384

where the file at source present in one context but not the other

397

where the file at source present in one context but not the other

385

needs to be moved to destination by the merge process, because the

398

needs to be moved to destination by the merge process, because the

386

other context moved the directory it is in.

399

other context moved the directory it is in.

387

400

388

"diverge" is a mapping of source name -> list of destination names

401

"diverge" is a mapping of source name -> list of destination names

389

for divergent renames.

402

for divergent renames.

390

403

391

"renamedelete" is a mapping of source name -> list of destination

404

"renamedelete" is a mapping of source name -> list of destination

392

names for files deleted in c1 that were renamed in c2 or vice-versa.

405

names for files deleted in c1 that were renamed in c2 or vice-versa.

393

406

394

"dirmove" is a mapping of detected source dir -> destination dir renames.

407

"dirmove" is a mapping of detected source dir -> destination dir renames.

395

This is needed for handling changes to new files previously grafted into

408

This is needed for handling changes to new files previously grafted into

396

renamed directories.

409

renamed directories.

397

410

398

This function calls different copytracing algorithms based on config.

411

This function calls different copytracing algorithms based on config.

399

"""

412

"""

400

# avoid silly behavior for update from empty dir

413

# avoid silly behavior for update from empty dir

401

if not c1 or not c2 or c1 == c2:

414

if not c1 or not c2 or c1 == c2:

402

return {}, {}, {}, {}, {}

415

return {}, {}, {}, {}, {}

403

416

404

narrowmatch = c1.repo().narrowmatch()

417

narrowmatch = c1.repo().narrowmatch()

405

418

406

# avoid silly behavior for parent -> working dir

419

# avoid silly behavior for parent -> working dir

407

if c2.node() is None and c1.node() == repo.dirstate.p1():

420

if c2.node() is None and c1.node() == repo.dirstate.p1():

408

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

421

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

409

422

410

copytracing = repo.ui.config(b'experimental', b'copytrace')

423

copytracing = repo.ui.config(b'experimental', b'copytrace')

411

if stringutil.parsebool(copytracing) is False:

424

if stringutil.parsebool(copytracing) is False:

412

# stringutil.parsebool() returns None when it is unable to parse the

425

# stringutil.parsebool() returns None when it is unable to parse the

413

# value, so we should rely on making sure copytracing is on such cases

426

# value, so we should rely on making sure copytracing is on such cases

414

return {}, {}, {}, {}, {}

427

return {}, {}, {}, {}, {}

415

428

416

if usechangesetcentricalgo(repo):

429

if usechangesetcentricalgo(repo):

417

# The heuristics don't make sense when we need changeset-centric algos

430

# The heuristics don't make sense when we need changeset-centric algos

418

return _fullcopytracing(repo, c1, c2, base)

431

return _fullcopytracing(repo, c1, c2, base)

419

432

420

# Copy trace disabling is explicitly below the node == p1 logic above

433

# Copy trace disabling is explicitly below the node == p1 logic above

421

# because the logic above is required for a simple copy to be kept across a

434

# because the logic above is required for a simple copy to be kept across a

422

# rebase.

435

# rebase.

423

if copytracing == b'heuristics':

436

if copytracing == b'heuristics':

424

# Do full copytracing if only non-public revisions are involved as

437

# Do full copytracing if only non-public revisions are involved as

425

# that will be fast enough and will also cover the copies which could

438

# that will be fast enough and will also cover the copies which could

426

# be missed by heuristics

439

# be missed by heuristics

427

if _isfullcopytraceable(repo, c1, base):

440

if _isfullcopytraceable(repo, c1, base):

428

return _fullcopytracing(repo, c1, c2, base)

441

return _fullcopytracing(repo, c1, c2, base)

429

return _heuristicscopytracing(repo, c1, c2, base)

442

return _heuristicscopytracing(repo, c1, c2, base)

430

else:

443

else:

431

return _fullcopytracing(repo, c1, c2, base)

444

return _fullcopytracing(repo, c1, c2, base)

432

445

433

446

434

def _isfullcopytraceable(repo, c1, base):

447

def _isfullcopytraceable(repo, c1, base):

435

""" Checks that if base, source and destination are all no-public branches,

448

""" Checks that if base, source and destination are all no-public branches,

436

if yes let's use the full copytrace algorithm for increased capabilities

449

if yes let's use the full copytrace algorithm for increased capabilities

437

since it will be fast enough.

450

since it will be fast enough.

438

451

439

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

452

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

440

number of changesets from c1 to base such that if number of changesets are

453

number of changesets from c1 to base such that if number of changesets are

441

more than the limit, full copytracing algorithm won't be used.

454

more than the limit, full copytracing algorithm won't be used.

442

"""

455

"""

443

if c1.rev() is None:

456

if c1.rev() is None:

444

c1 = c1.p1()

457

c1 = c1.p1()

445

if c1.mutable() and base.mutable():

458

if c1.mutable() and base.mutable():

446

sourcecommitlimit = repo.ui.configint(

459

sourcecommitlimit = repo.ui.configint(

447

b'experimental', b'copytrace.sourcecommitlimit'

460

b'experimental', b'copytrace.sourcecommitlimit'

448

)

461

)

449

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

462

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

450

return commits < sourcecommitlimit

463

return commits < sourcecommitlimit

451

return False

464

return False

452

465

453

466

454

def _checksinglesidecopies(

467

def _checksinglesidecopies(

455

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

468

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

456

):

469

):

457

if src not in m2:

470

if src not in m2:

458

# deleted on side 2

471

# deleted on side 2

459

if src not in m1:

472

if src not in m1:

460

# renamed on side 1, deleted on side 2

473

# renamed on side 1, deleted on side 2

461

renamedelete[src] = dsts1

474

renamedelete[src] = dsts1

462

elif m2[src] != mb[src]:

475

elif m2[src] != mb[src]:

463

if not _related(c2[src], base[src]):

476

if not _related(c2[src], base[src]):

464

return

477

return

465

# modified on side 2

478

# modified on side 2

466

for dst in dsts1:

479

for dst in dsts1:

467

if dst not in m2:

480

if dst not in m2:

468

# dst not added on side 2 (handle as regular

481

# dst not added on side 2 (handle as regular

469

# "both created" case in manifestmerge otherwise)

482

# "both created" case in manifestmerge otherwise)

470

copy[dst] = src

483

copy[dst] = src

471

484

472

485

473

def _fullcopytracing(repo, c1, c2, base):

486

def _fullcopytracing(repo, c1, c2, base):

474

""" The full copytracing algorithm which finds all the new files that were

487

""" The full copytracing algorithm which finds all the new files that were

475

added from merge base up to the top commit and for each file it checks if

488

added from merge base up to the top commit and for each file it checks if

476

this file was copied from another file.

489

this file was copied from another file.

477

490

478

This is pretty slow when a lot of changesets are involved but will track all

491

This is pretty slow when a lot of changesets are involved but will track all

479

the copies.

492

the copies.

480

"""

493

"""

481

m1 = c1.manifest()

494

m1 = c1.manifest()

482

m2 = c2.manifest()

495

m2 = c2.manifest()

483

mb = base.manifest()

496

mb = base.manifest()

484

497

485

copies1 = pathcopies(base, c1)

498

copies1 = pathcopies(base, c1)

486

copies2 = pathcopies(base, c2)

499

copies2 = pathcopies(base, c2)

487

500

488

inversecopies1 = {}

501

inversecopies1 = {}

489

inversecopies2 = {}

502

inversecopies2 = {}

490

for dst, src in copies1.items():

503

for dst, src in copies1.items():

491

inversecopies1.setdefault(src, []).append(dst)

504

inversecopies1.setdefault(src, []).append(dst)

492

for dst, src in copies2.items():

505

for dst, src in copies2.items():

493

inversecopies2.setdefault(src, []).append(dst)

506

inversecopies2.setdefault(src, []).append(dst)

494

507

495

copy = {}

508

copy = {}

496

diverge = {}

509

diverge = {}

497

renamedelete = {}

510

renamedelete = {}

498

allsources = set(inversecopies1) | set(inversecopies2)

511

allsources = set(inversecopies1) | set(inversecopies2)

499

for src in allsources:

512

for src in allsources:

500

dsts1 = inversecopies1.get(src)

513

dsts1 = inversecopies1.get(src)

501

dsts2 = inversecopies2.get(src)

514

dsts2 = inversecopies2.get(src)

502

if dsts1 and dsts2:

515

if dsts1 and dsts2:

503

# copied/renamed on both sides

516

# copied/renamed on both sides

504

if src not in m1 and src not in m2:

517

if src not in m1 and src not in m2:

505

# renamed on both sides

518

# renamed on both sides

506

dsts1 = set(dsts1)

519

dsts1 = set(dsts1)

507

dsts2 = set(dsts2)

520

dsts2 = set(dsts2)

508

# If there's some overlap in the rename destinations, we

521

# If there's some overlap in the rename destinations, we

509

# consider it not divergent. For example, if side 1 copies 'a'

522

# consider it not divergent. For example, if side 1 copies 'a'

510

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

523

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

511

# and 'd' and deletes 'a'.

524

# and 'd' and deletes 'a'.

512

if dsts1 & dsts2:

525

if dsts1 & dsts2:

513

for dst in dsts1 & dsts2:

526

for dst in dsts1 & dsts2:

514

copy[dst] = src

527

copy[dst] = src

515

else:

528

else:

516

diverge[src] = sorted(dsts1 | dsts2)

529

diverge[src] = sorted(dsts1 | dsts2)

517

elif src in m1 and src in m2:

530

elif src in m1 and src in m2:

518

# copied on both sides

531

# copied on both sides

519

dsts1 = set(dsts1)

532

dsts1 = set(dsts1)

520

dsts2 = set(dsts2)

533

dsts2 = set(dsts2)

521

for dst in dsts1 & dsts2:

534

for dst in dsts1 & dsts2:

522

copy[dst] = src

535

copy[dst] = src

523

# TODO: Handle cases where it was renamed on one side and copied

536

# TODO: Handle cases where it was renamed on one side and copied

524

# on the other side

537

# on the other side

525

elif dsts1:

538

elif dsts1:

526

# copied/renamed only on side 1

539

# copied/renamed only on side 1

527

_checksinglesidecopies(

540

_checksinglesidecopies(

528

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

541

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

529

)

542

)

530

elif dsts2:

543

elif dsts2:

531

# copied/renamed only on side 2

544

# copied/renamed only on side 2

532

_checksinglesidecopies(

545

_checksinglesidecopies(

533

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

546

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

534

)

547

)

535

548

536

renamedeleteset = set()

549

renamedeleteset = set()

537

divergeset = set()

550

divergeset = set()

538

for dsts in diverge.values():

551

for dsts in diverge.values():

539

divergeset.update(dsts)

552

divergeset.update(dsts)

540

for dsts in renamedelete.values():

553

for dsts in renamedelete.values():

541

renamedeleteset.update(dsts)

554

renamedeleteset.update(dsts)

542

555

543

# find interesting file sets from manifests

556

# find interesting file sets from manifests

544

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

557

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

545

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

558

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

546

u1 = sorted(addedinm1 - addedinm2)

559

u1 = sorted(addedinm1 - addedinm2)

547

u2 = sorted(addedinm2 - addedinm1)

560

u2 = sorted(addedinm2 - addedinm1)

548

561

549

header = b" unmatched files in %s"

562

header = b" unmatched files in %s"

550

if u1:

563

if u1:

551

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

564

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

552

if u2:

565

if u2:

553

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

566

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

554

567

555

fullcopy = copies1.copy()

568

fullcopy = copies1.copy()

556

fullcopy.update(copies2)

569

fullcopy.update(copies2)

557

if not fullcopy:

570

if not fullcopy:

558

return copy, {}, diverge, renamedelete, {}

571

return copy, {}, diverge, renamedelete, {}

559

572

560

if repo.ui.debugflag:

573

if repo.ui.debugflag:

561

repo.ui.debug(

574

repo.ui.debug(

562

b" all copies found (* = to merge, ! = divergent, "

575

b" all copies found (* = to merge, ! = divergent, "

563

b"% = renamed and deleted):\n"

576

b"% = renamed and deleted):\n"

564

)

577

)

565

for f in sorted(fullcopy):

578

for f in sorted(fullcopy):

566

note = b""

579

note = b""

567

if f in copy:

580

if f in copy:

568

note += b"*"

581

note += b"*"

569

if f in divergeset:

582

if f in divergeset:

570

note += b"!"

583

note += b"!"

571

if f in renamedeleteset:

584

if f in renamedeleteset:

572

note += b"%"

585

note += b"%"

573

repo.ui.debug(

586

repo.ui.debug(

574

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

587

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

575

)

588

)

576

del divergeset

589

del divergeset

577

590

578

repo.ui.debug(b" checking for directory renames\n")

591

repo.ui.debug(b" checking for directory renames\n")

579

592

580

# generate a directory move map

593

# generate a directory move map

581

d1, d2 = c1.dirs(), c2.dirs()

594

d1, d2 = c1.dirs(), c2.dirs()

582

invalid = set()

595

invalid = set()

583

dirmove = {}

596

dirmove = {}

584

597

585

# examine each file copy for a potential directory move, which is

598

# examine each file copy for a potential directory move, which is

586

# when all the files in a directory are moved to a new directory

599

# when all the files in a directory are moved to a new directory

587

for dst, src in pycompat.iteritems(fullcopy):

600

for dst, src in pycompat.iteritems(fullcopy):

588

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

601

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

589

if dsrc in invalid:

602

if dsrc in invalid:

590

# already seen to be uninteresting

603

# already seen to be uninteresting

591

continue

604

continue

592

elif dsrc in d1 and ddst in d1:

605

elif dsrc in d1 and ddst in d1:

593

# directory wasn't entirely moved locally

606

# directory wasn't entirely moved locally

594

invalid.add(dsrc)

607

invalid.add(dsrc)

595

elif dsrc in d2 and ddst in d2:

608

elif dsrc in d2 and ddst in d2:

596

# directory wasn't entirely moved remotely

609

# directory wasn't entirely moved remotely

597

invalid.add(dsrc)

610

invalid.add(dsrc)

598

elif dsrc in dirmove and dirmove[dsrc] != ddst:

611

elif dsrc in dirmove and dirmove[dsrc] != ddst:

599

# files from the same directory moved to two different places

612

# files from the same directory moved to two different places

600

invalid.add(dsrc)

613

invalid.add(dsrc)

601

else:

614

else:

602

# looks good so far

615

# looks good so far

603

dirmove[dsrc] = ddst

616

dirmove[dsrc] = ddst

604

617

605

for i in invalid:

618

for i in invalid:

606

if i in dirmove:

619

if i in dirmove:

607

del dirmove[i]

620

del dirmove[i]

608

del d1, d2, invalid

621

del d1, d2, invalid

609

622

610

if not dirmove:

623

if not dirmove:

611

return copy, {}, diverge, renamedelete, {}

624

return copy, {}, diverge, renamedelete, {}

612

625

613

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

626

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

614

627

615

for d in dirmove:

628

for d in dirmove:

616

repo.ui.debug(

629

repo.ui.debug(

617

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

630

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

618

)

631

)

619

632

620

movewithdir = {}

633

movewithdir = {}

621

# check unaccounted nonoverlapping files against directory moves

634

# check unaccounted nonoverlapping files against directory moves

622

for f in u1 + u2:

635

for f in u1 + u2:

623

if f not in fullcopy:

636

if f not in fullcopy:

624

for d in dirmove:

637

for d in dirmove:

625

if f.startswith(d):

638

if f.startswith(d):

626

# new file added in a directory that was moved, move it

639

# new file added in a directory that was moved, move it

627

df = dirmove[d] + f[len(d) :]

640

df = dirmove[d] + f[len(d) :]

628

if df not in copy:

641

if df not in copy:

629

movewithdir[f] = df

642

movewithdir[f] = df

630

repo.ui.debug(

643

repo.ui.debug(

631

b" pending file src: '%s' -> dst: '%s'\n"

644

b" pending file src: '%s' -> dst: '%s'\n"

632

% (f, df)

645

% (f, df)

633

)

646

)

634

break

647

break

635

648

636

return copy, movewithdir, diverge, renamedelete, dirmove

649

return copy, movewithdir, diverge, renamedelete, dirmove

637

650

638

651

639

def _heuristicscopytracing(repo, c1, c2, base):

652

def _heuristicscopytracing(repo, c1, c2, base):

640

""" Fast copytracing using filename heuristics

653

""" Fast copytracing using filename heuristics

641

654

642

Assumes that moves or renames are of following two types:

655

Assumes that moves or renames are of following two types:

643

656

644

1) Inside a directory only (same directory name but different filenames)

657

1) Inside a directory only (same directory name but different filenames)

645

2) Move from one directory to another

658

2) Move from one directory to another

646

(same filenames but different directory names)

659

(same filenames but different directory names)

647

660

648

Works only when there are no merge commits in the "source branch".

661

Works only when there are no merge commits in the "source branch".

649

Source branch is commits from base up to c2 not including base.

662

Source branch is commits from base up to c2 not including base.

650

663

651

If merge is involved it fallbacks to _fullcopytracing().

664

If merge is involved it fallbacks to _fullcopytracing().

652

665

653

Can be used by setting the following config:

666

Can be used by setting the following config:

654

667

655

[experimental]

668

[experimental]

656

copytrace = heuristics

669

copytrace = heuristics

657

670

658

In some cases the copy/move candidates found by heuristics can be very large

671

In some cases the copy/move candidates found by heuristics can be very large

659

in number and that will make the algorithm slow. The number of possible

672

in number and that will make the algorithm slow. The number of possible

660

candidates to check can be limited by using the config

673

candidates to check can be limited by using the config

661

`experimental.copytrace.movecandidateslimit` which defaults to 100.

674

`experimental.copytrace.movecandidateslimit` which defaults to 100.

662

"""

675

"""

663

676

664

if c1.rev() is None:

677

if c1.rev() is None:

665

c1 = c1.p1()

678

c1 = c1.p1()

666

if c2.rev() is None:

679

if c2.rev() is None:

667

c2 = c2.p1()

680

c2 = c2.p1()

668

681

669

copies = {}

682

copies = {}

670

683

671

changedfiles = set()

684

changedfiles = set()

672

m1 = c1.manifest()

685

m1 = c1.manifest()

673

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

686

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

674

# If base is not in c2 branch, we switch to fullcopytracing

687

# If base is not in c2 branch, we switch to fullcopytracing

675

repo.ui.debug(

688

repo.ui.debug(

676

b"switching to full copytracing as base is not "

689

b"switching to full copytracing as base is not "

677

b"an ancestor of c2\n"

690

b"an ancestor of c2\n"

678

)

691

)

679

return _fullcopytracing(repo, c1, c2, base)

692

return _fullcopytracing(repo, c1, c2, base)

680

693

681

ctx = c2

694

ctx = c2

682

while ctx != base:

695

while ctx != base:

683

if len(ctx.parents()) == 2:

696

if len(ctx.parents()) == 2:

684

# To keep things simple let's not handle merges

697

# To keep things simple let's not handle merges

685

repo.ui.debug(b"switching to full copytracing because of merges\n")

698

repo.ui.debug(b"switching to full copytracing because of merges\n")

686

return _fullcopytracing(repo, c1, c2, base)

699

return _fullcopytracing(repo, c1, c2, base)

687

changedfiles.update(ctx.files())

700

changedfiles.update(ctx.files())

688

ctx = ctx.p1()

701

ctx = ctx.p1()

689

702

690

cp = _forwardcopies(base, c2)

703

cp = _forwardcopies(base, c2)

691

for dst, src in pycompat.iteritems(cp):

704

for dst, src in pycompat.iteritems(cp):

692

if src in m1:

705

if src in m1:

693

copies[dst] = src

706

copies[dst] = src

694

707

695

# file is missing if it isn't present in the destination, but is present in

708

# file is missing if it isn't present in the destination, but is present in

696

# the base and present in the source.

709

# the base and present in the source.

697

# Presence in the base is important to exclude added files, presence in the

710

# Presence in the base is important to exclude added files, presence in the

698

# source is important to exclude removed files.

711

# source is important to exclude removed files.

699

filt = lambda f: f not in m1 and f in base and f in c2

712

filt = lambda f: f not in m1 and f in base and f in c2

700

missingfiles = [f for f in changedfiles if filt(f)]

713

missingfiles = [f for f in changedfiles if filt(f)]

701

714

702

if missingfiles:

715

if missingfiles:

703

basenametofilename = collections.defaultdict(list)

716

basenametofilename = collections.defaultdict(list)

704

dirnametofilename = collections.defaultdict(list)

717

dirnametofilename = collections.defaultdict(list)

705

718

706

for f in m1.filesnotin(base.manifest()):

719

for f in m1.filesnotin(base.manifest()):

707

basename = os.path.basename(f)

720

basename = os.path.basename(f)

708

dirname = os.path.dirname(f)

721

dirname = os.path.dirname(f)

709

basenametofilename[basename].append(f)

722

basenametofilename[basename].append(f)

710

dirnametofilename[dirname].append(f)

723

dirnametofilename[dirname].append(f)

711

724

712

for f in missingfiles:

725

for f in missingfiles:

713

basename = os.path.basename(f)

726

basename = os.path.basename(f)

714

dirname = os.path.dirname(f)

727

dirname = os.path.dirname(f)

715

samebasename = basenametofilename[basename]

728

samebasename = basenametofilename[basename]

716

samedirname = dirnametofilename[dirname]

729

samedirname = dirnametofilename[dirname]

717

movecandidates = samebasename + samedirname

730

movecandidates = samebasename + samedirname

718

# f is guaranteed to be present in c2, that's why

731

# f is guaranteed to be present in c2, that's why

719

# c2.filectx(f) won't fail

732

# c2.filectx(f) won't fail

720

f2 = c2.filectx(f)

733

f2 = c2.filectx(f)

721

# we can have a lot of candidates which can slow down the heuristics

734

# we can have a lot of candidates which can slow down the heuristics

722

# config value to limit the number of candidates moves to check

735

# config value to limit the number of candidates moves to check

723

maxcandidates = repo.ui.configint(

736

maxcandidates = repo.ui.configint(

724

b'experimental', b'copytrace.movecandidateslimit'

737

b'experimental', b'copytrace.movecandidateslimit'

725

)

738

)

726

739

727

if len(movecandidates) > maxcandidates:

740

if len(movecandidates) > maxcandidates:

728

repo.ui.status(

741

repo.ui.status(

729

_(

742

_(

730

b"skipping copytracing for '%s', more "

743

b"skipping copytracing for '%s', more "

731

b"candidates than the limit: %d\n"

744

b"candidates than the limit: %d\n"

732

)

745

)

733

% (f, len(movecandidates))

746

% (f, len(movecandidates))

734

)

747

)

735

continue

748

continue

736

749

737

for candidate in movecandidates:

750

for candidate in movecandidates:

738

f1 = c1.filectx(candidate)

751

f1 = c1.filectx(candidate)

739

if _related(f1, f2):

752

if _related(f1, f2):

740

# if there are a few related copies then we'll merge

753

# if there are a few related copies then we'll merge

741

# changes into all of them. This matches the behaviour

754

# changes into all of them. This matches the behaviour

742

# of upstream copytracing

755

# of upstream copytracing

743

copies[candidate] = f

756

copies[candidate] = f

744

757

745

return copies, {}, {}, {}, {}

758

return copies, {}, {}, {}, {}

746

759

747

760

748

def _related(f1, f2):

761

def _related(f1, f2):

749

"""return True if f1 and f2 filectx have a common ancestor

762

"""return True if f1 and f2 filectx have a common ancestor

750

763

751

Walk back to common ancestor to see if the two files originate

764

Walk back to common ancestor to see if the two files originate

752

from the same file. Since workingfilectx's rev() is None it messes

765

from the same file. Since workingfilectx's rev() is None it messes

753

up the integer comparison logic, hence the pre-step check for

766

up the integer comparison logic, hence the pre-step check for

754

None (f1 and f2 can only be workingfilectx's initially).

767

None (f1 and f2 can only be workingfilectx's initially).

755

"""

768

"""

756

769

757

if f1 == f2:

770

if f1 == f2:

758

return True # a match

771

return True # a match

759

772

760

g1, g2 = f1.ancestors(), f2.ancestors()

773

g1, g2 = f1.ancestors(), f2.ancestors()

761

try:

774

try:

762

f1r, f2r = f1.linkrev(), f2.linkrev()

775

f1r, f2r = f1.linkrev(), f2.linkrev()

763

776

764

if f1r is None:

777

if f1r is None:

765

f1 = next(g1)

778

f1 = next(g1)

766

if f2r is None:

779

if f2r is None:

767

f2 = next(g2)

780

f2 = next(g2)

768

781

769

while True:

782

while True:

770

f1r, f2r = f1.linkrev(), f2.linkrev()

783

f1r, f2r = f1.linkrev(), f2.linkrev()

771

if f1r > f2r:

784

if f1r > f2r:

772

f1 = next(g1)

785

f1 = next(g1)

773

elif f2r > f1r:

786

elif f2r > f1r:

774

f2 = next(g2)

787

f2 = next(g2)

775

else: # f1 and f2 point to files in the same linkrev

788

else: # f1 and f2 point to files in the same linkrev

776

return f1 == f2 # true if they point to the same file

789

return f1 == f2 # true if they point to the same file

777

except StopIteration:

790

except StopIteration:

778

return False

791

return False

779

792

780

793

781

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

794

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

782

"""reproduce copies from fromrev to rev in the dirstate

795

"""reproduce copies from fromrev to rev in the dirstate

783

796

784

If skiprev is specified, it's a revision that should be used to

797

If skiprev is specified, it's a revision that should be used to

785

filter copy records. Any copies that occur between fromrev and

798

filter copy records. Any copies that occur between fromrev and

786

skiprev will not be duplicated, even if they appear in the set of

799

skiprev will not be duplicated, even if they appear in the set of

787

copies between fromrev and rev.

800

copies between fromrev and rev.

788

"""

801

"""

789

exclude = {}

802

exclude = {}

790

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

803

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

791

bctrace = stringutil.parsebool(ctraceconfig)

804

bctrace = stringutil.parsebool(ctraceconfig)

792

if skiprev is not None and (

805

if skiprev is not None and (

793

ctraceconfig == b'heuristics' or bctrace or bctrace is None

806

ctraceconfig == b'heuristics' or bctrace or bctrace is None

794

):

807

):

795

# copytrace='off' skips this line, but not the entire function because

808

# copytrace='off' skips this line, but not the entire function because

796

# the line below is O(size of the repo) during a rebase, while the rest

809

# the line below is O(size of the repo) during a rebase, while the rest

797

# of the function is much faster (and is required for carrying copy

810

# of the function is much faster (and is required for carrying copy

798

# metadata across the rebase anyway).

811

# metadata across the rebase anyway).

799

exclude = pathcopies(repo[fromrev], repo[skiprev])

812

exclude = pathcopies(repo[fromrev], repo[skiprev])

800

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

813

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

801

if dst in exclude:

814

if dst in exclude:

802

continue

815

continue

803

if dst in wctx:

816

if dst in wctx:

804

wctx[dst].markcopied(src)

817

wctx[dst].markcopied(src)

805

818

806

819

807

def computechangesetfilesadded(ctx):

820

def computechangesetfilesadded(ctx):

808

"""return the list of files added in a changeset

821

"""return the list of files added in a changeset

809

"""

822

"""

810

added = []

823

added = []

811

for f in ctx.files():

824

for f in ctx.files():

812

if not any(f in p for p in ctx.parents()):

825

if not any(f in p for p in ctx.parents()):

813

added.append(f)

826

added.append(f)

814

return added

827

return added

815

828

816

829

817

def computechangesetfilesremoved(ctx):

830

def computechangesetfilesremoved(ctx):

818

"""return the list of files removed in a changeset

831

"""return the list of files removed in a changeset

819

"""

832

"""

820

removed = []

833

removed = []

821

for f in ctx.files():

834

for f in ctx.files():

822

if f not in ctx:

835

if f not in ctx:

823

removed.append(f)

836

removed.append(f)

824

return removed

837

return removed

825

838

826

839

827

def computechangesetcopies(ctx):

840

def computechangesetcopies(ctx):

828

"""return the copies data for a changeset

841

"""return the copies data for a changeset

829

842

830

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

843

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

831

844

832

Each dictionnary are in the form: `{newname: oldname}`

845

Each dictionnary are in the form: `{newname: oldname}`

833

"""

846

"""

834

p1copies = {}

847

p1copies = {}

835

p2copies = {}

848

p2copies = {}

836

p1 = ctx.p1()

849

p1 = ctx.p1()

837

p2 = ctx.p2()

850

p2 = ctx.p2()

838

narrowmatch = ctx._repo.narrowmatch()

851

narrowmatch = ctx._repo.narrowmatch()

839

for dst in ctx.files():

852

for dst in ctx.files():

840

if not narrowmatch(dst) or dst not in ctx:

853

if not narrowmatch(dst) or dst not in ctx:

841

continue

854

continue

842

copied = ctx[dst].renamed()

855

copied = ctx[dst].renamed()

843

if not copied:

856

if not copied:

844

continue

857

continue

845

src, srcnode = copied

858

src, srcnode = copied

846

if src in p1 and p1[src].filenode() == srcnode:

859

if src in p1 and p1[src].filenode() == srcnode:

847

p1copies[dst] = src

860

p1copies[dst] = src

848

elif src in p2 and p2[src].filenode() == srcnode:

861

elif src in p2 and p2[src].filenode() == srcnode:

849

p2copies[dst] = src

862

p2copies[dst] = src

850

return p1copies, p2copies

863

return p1copies, p2copies

851

864

852

865

853

def encodecopies(files, copies):

866

def encodecopies(files, copies):

854

items = []

867

items = []

855

for i, dst in enumerate(files):

868

for i, dst in enumerate(files):

856

if dst in copies:

869

if dst in copies:

857

items.append(b'%d\0%s' % (i, copies[dst]))

870

items.append(b'%d\0%s' % (i, copies[dst]))

858

if len(items) != len(copies):

871

if len(items) != len(copies):

859

raise error.ProgrammingError(

872

raise error.ProgrammingError(

860

b'some copy targets missing from file list'

873

b'some copy targets missing from file list'

861

)

874

)

862

return b"\n".join(items)

875

return b"\n".join(items)

863

876

864

877

865

def decodecopies(files, data):

878

def decodecopies(files, data):

866

try:

879

try:

867

copies = {}

880

copies = {}

868

if not data:

881

if not data:

869

return copies

882

return copies

870

for l in data.split(b'\n'):

883

for l in data.split(b'\n'):

871

strindex, src = l.split(b'\0')

884

strindex, src = l.split(b'\0')

872

i = int(strindex)

885

i = int(strindex)

873

dst = files[i]

886

dst = files[i]

874

copies[dst] = src

887

copies[dst] = src

875

return copies

888

return copies

876

except (ValueError, IndexError):

889

except (ValueError, IndexError):

877

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

890

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

878

# used different syntax for the value.

891

# used different syntax for the value.

879

return None

892

return None

880

893

881

894

882

def encodefileindices(files, subset):

895

def encodefileindices(files, subset):

883

subset = set(subset)

896

subset = set(subset)

884

indices = []

897

indices = []

885

for i, f in enumerate(files):

898

for i, f in enumerate(files):

886

if f in subset:

899

if f in subset:

887

indices.append(b'%d' % i)

900

indices.append(b'%d' % i)

888

return b'\n'.join(indices)

901

return b'\n'.join(indices)

889

902

890

903

891

def decodefileindices(files, data):

904

def decodefileindices(files, data):

892

try:

905

try:

893

subset = []

906

subset = []

894

if not data:

907

if not data:

895

return subset

908

return subset

896

for strindex in data.split(b'\n'):

909

for strindex in data.split(b'\n'):

897

i = int(strindex)

910

i = int(strindex)

898

if i < 0 or i >= len(files):

911

if i < 0 or i >= len(files):

899

return None

912

return None

900

subset.append(files[i])

913

subset.append(files[i])

901

return subset

914

return subset

902

except (ValueError, IndexError):

915

except (ValueError, IndexError):

903

# Perhaps someone had chosen the same key name (e.g. "added") and

916

# Perhaps someone had chosen the same key name (e.g. "added") and

904

# used different syntax for the value.

917

# used different syntax for the value.

905

return None

918

return None

906

919

907

920

908

def _getsidedata(srcrepo, rev):

921

def _getsidedata(srcrepo, rev):

909

ctx = srcrepo[rev]

922

ctx = srcrepo[rev]

910

filescopies = computechangesetcopies(ctx)

923

filescopies = computechangesetcopies(ctx)

911

filesadded = computechangesetfilesadded(ctx)

924

filesadded = computechangesetfilesadded(ctx)

912

filesremoved = computechangesetfilesremoved(ctx)

925

filesremoved = computechangesetfilesremoved(ctx)

913

sidedata = {}

926

sidedata = {}

914

if any([filescopies, filesadded, filesremoved]):

927

if any([filescopies, filesadded, filesremoved]):

915

sortedfiles = sorted(ctx.files())

928

sortedfiles = sorted(ctx.files())

916

p1copies, p2copies = filescopies

929

p1copies, p2copies = filescopies

917

p1copies = encodecopies(sortedfiles, p1copies)

930

p1copies = encodecopies(sortedfiles, p1copies)

918

p2copies = encodecopies(sortedfiles, p2copies)

931

p2copies = encodecopies(sortedfiles, p2copies)

919

filesadded = encodefileindices(sortedfiles, filesadded)

932

filesadded = encodefileindices(sortedfiles, filesadded)

920

filesremoved = encodefileindices(sortedfiles, filesremoved)

933

filesremoved = encodefileindices(sortedfiles, filesremoved)

921

if p1copies:

934

if p1copies:

922

sidedata[sidedatamod.SD_P1COPIES] = p1copies

935

sidedata[sidedatamod.SD_P1COPIES] = p1copies

923

if p2copies:

936

if p2copies:

924

sidedata[sidedatamod.SD_P2COPIES] = p2copies

937

sidedata[sidedatamod.SD_P2COPIES] = p2copies

925

if filesadded:

938

if filesadded:

926

sidedata[sidedatamod.SD_FILESADDED] = filesadded

939

sidedata[sidedatamod.SD_FILESADDED] = filesadded

927

if filesremoved:

940

if filesremoved:

928

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

941

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

929

return sidedata

942

return sidedata

930

943

931

944

932

def getsidedataadder(srcrepo, destrepo):

945

def getsidedataadder(srcrepo, destrepo):

933

def sidedatacompanion(revlog, rev):

946

def sidedatacompanion(revlog, rev):

934

sidedata = {}

947

sidedata = {}

935

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

948

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

936

sidedata = _getsidedata(srcrepo, rev)

949

sidedata = _getsidedata(srcrepo, rev)

937

return False, (), sidedata

950

return False, (), sidedata

938

951

939

return sidedatacompanion

952

return sidedatacompanion

940

953

941

954

942

def getsidedataremover(srcrepo, destrepo):

955

def getsidedataremover(srcrepo, destrepo):

943

def sidedatacompanion(revlog, rev):

956

def sidedatacompanion(revlog, rev):

944

f = ()

957

f = ()

945

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

958

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

946

if revlog.flags(rev) & REVIDX_SIDEDATA:

959

if revlog.flags(rev) & REVIDX_SIDEDATA:

947

f = (

960

f = (

948

sidedatamod.SD_P1COPIES,

961

sidedatamod.SD_P1COPIES,

949

sidedatamod.SD_P2COPIES,

962

sidedatamod.SD_P2COPIES,

950

sidedatamod.SD_FILESADDED,

963

sidedatamod.SD_FILESADDED,

951

sidedatamod.SD_FILESREMOVED,

964

sidedatamod.SD_FILESREMOVED,

952

)

965

)

953

return False, f, {}

966

return False, f, {}

954

967

955

return sidedatacompanion

968

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
-                def revinfo(rev):
+                if repo.filecopiesmode == b'changeset-sidedata':
-                    p1, p2 = parents(rev)
+                    changelogrevision = cl.changelogrevision
-                    ctx = repo[rev]
-                    p1copies, p2copies = ctx._copies
+                    def revinfo(rev):
-                    removed = ctx.filesremoved()
+                        p1, p2 = parents(rev)
-                    return p1, p2, p1copies, p2copies, removed
+                        c = changelogrevision(rev)
+                        p1copies = c.p1copies
+                        p2copies = c.p2copies
+                        removed = c.filesremoved
+                        return p1, p2, p1copies, p2copies, removed
+                else:
+                    def revinfo(rev):
+                        p1, p2 = parents(rev)
+                        ctx = repo[rev]
+                        p1copies, p2copies = ctx._copies
+                        removed = ctx.filesremoved()
+                        return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                 roots = set(children) - set(missingrevs)
                 work = list(roots)
                 all_copies = {r: {} for r in roots}
                 heapq.heapify(work)
                 alwaysmatch = match.always()
                 while work:
                     r = heapq.heappop(work)
                     copies = all_copies.pop(r)
                     if r == b.rev():
                         return copies
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         # Copy the dict only if later iterations will also need it
                         if i != len(children[r]) - 1:
                             newcopies = copies.copy()
                         else:
                             newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                         for f in removed:
                             if f in newcopies:
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             heapq.heappush(work, c)
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 assert False
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion