upstream/mercurial-mirror Commit - r43550:00de32aa

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(a, b):

67

def _chain(a, b):

68

"""chain two sets of copies 'a' and 'b'"""

68

"""chain two sets of copies 'a' and 'b'"""

69

t = a.copy()

69

t = a.copy()

70

for k, v in pycompat.iteritems(b):

70

for k, v in pycompat.iteritems(b):

71

if v in t:

71

if v in t:

72

t[k] = t[v]

72

t[k] = t[v]

73

else:

73

else:

74

t[k] = v

74

t[k] = v

75

return t

75

return t

76

77

78

def _tracefile(fctx, am, basemf):

78

def _tracefile(fctx, am, basemf):

79

"""return file context that is the ancestor of fctx present in ancestor

79

"""return file context that is the ancestor of fctx present in ancestor

80

manifest am

80

manifest am

81

82

Note: we used to try and stop after a given limit, however checking if that

82

Note: we used to try and stop after a given limit, however checking if that

83

limit is reached turned out to be very expensive. we are better off

83

limit is reached turned out to be very expensive. we are better off

84

disabling that feature."""

84

disabling that feature."""

85

86

for f in fctx.ancestors():

86

for f in fctx.ancestors():

87

path = f.path()

87

path = f.path()

88

if am.get(path, None) == f.filenode():

88

if am.get(path, None) == f.filenode():

89

return path

89

return path

90

if basemf and basemf.get(path, None) == f.filenode():

90

if basemf and basemf.get(path, None) == f.filenode():

91

return path

91

return path

92

93

94

def _dirstatecopies(repo, match=None):

94

def _dirstatecopies(repo, match=None):

95

ds = repo.dirstate

95

ds = repo.dirstate

96

c = ds.copies().copy()

96

c = ds.copies().copy()

97

for k in list(c):

97

for k in list(c):

98

if ds[k] not in b'anm' or (match and not match(k)):

98

if ds[k] not in b'anm' or (match and not match(k)):

99

del c[k]

99

del c[k]

100

return c

100

return c

101

102

103

def _computeforwardmissing(a, b, match=None):

103

def _computeforwardmissing(a, b, match=None):

104

"""Computes which files are in b but not a.

104

"""Computes which files are in b but not a.

105

This is its own function so extensions can easily wrap this call to see what

105

This is its own function so extensions can easily wrap this call to see what

106

files _forwardcopies is about to process.

106

files _forwardcopies is about to process.

107

"""

107

"""

108

ma = a.manifest()

108

ma = a.manifest()

109

mb = b.manifest()

109

mb = b.manifest()

110

return mb.filesnotin(ma, match=match)

110

return mb.filesnotin(ma, match=match)

111

112

113

def usechangesetcentricalgo(repo):

113

def usechangesetcentricalgo(repo):

114

"""Checks if we should use changeset-centric copy algorithms"""

114

"""Checks if we should use changeset-centric copy algorithms"""

115

if repo.filecopiesmode == b'changeset-sidedata':

115

if repo.filecopiesmode == b'changeset-sidedata':

116

return True

116

return True

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

118

changesetsource = (b'changeset-only', b'compatibility')

118

changesetsource = (b'changeset-only', b'compatibility')

119

return readfrom in changesetsource

119

return readfrom in changesetsource

120

121

122

def _committedforwardcopies(a, b, base, match):

122

def _committedforwardcopies(a, b, base, match):

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

124

# files might have to be traced back to the fctx parent of the last

124

# files might have to be traced back to the fctx parent of the last

125

# one-side-only changeset, but not further back than that

125

# one-side-only changeset, but not further back than that

126

repo = a._repo

126

repo = a._repo

127

128

if usechangesetcentricalgo(repo):

128

if usechangesetcentricalgo(repo):

129

return _changesetforwardcopies(a, b, match)

129

return _changesetforwardcopies(a, b, match)

130

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

132

dbg = repo.ui.debug

132

dbg = repo.ui.debug

133

if debug:

133

if debug:

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

135

am = a.manifest()

135

am = a.manifest()

136

basemf = None if base is None else base.manifest()

136

basemf = None if base is None else base.manifest()

137

138

# find where new files came from

138

# find where new files came from

139

# we currently don't try to find where old files went, too expensive

139

# we currently don't try to find where old files went, too expensive

140

# this means we can miss a case like 'hg rm b; hg cp a b'

140

# this means we can miss a case like 'hg rm b; hg cp a b'

141

cm = {}

141

cm = {}

142

143

# Computing the forward missing is quite expensive on large manifests, since

143

# Computing the forward missing is quite expensive on large manifests, since

144

# it compares the entire manifests. We can optimize it in the common use

144

# it compares the entire manifests. We can optimize it in the common use

145

# case of computing what copies are in a commit versus its parent (like

145

# case of computing what copies are in a commit versus its parent (like

146

# during a rebase or histedit). Note, we exclude merge commits from this

146

# during a rebase or histedit). Note, we exclude merge commits from this

147

# optimization, since the ctx.files() for a merge commit is not correct for

147

# optimization, since the ctx.files() for a merge commit is not correct for

148

# this comparison.

148

# this comparison.

149

forwardmissingmatch = match

149

forwardmissingmatch = match

150

if b.p1() == a and b.p2().node() == node.nullid:

150

if b.p1() == a and b.p2().node() == node.nullid:

151

filesmatcher = matchmod.exact(b.files())

151

filesmatcher = matchmod.exact(b.files())

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

154

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

156

157

if debug:

157

if debug:

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

159

160

for f in sorted(missing):

160

for f in sorted(missing):

161

if debug:

161

if debug:

162

dbg(b'debug.copies: tracing file: %s\n' % f)

162

dbg(b'debug.copies: tracing file: %s\n' % f)

163

fctx = b[f]

163

fctx = b[f]

164

fctx._ancestrycontext = ancestrycontext

164

fctx._ancestrycontext = ancestrycontext

165

166

if debug:

166

if debug:

167

start = util.timer()

167

start = util.timer()

168

opath = _tracefile(fctx, am, basemf)

168

opath = _tracefile(fctx, am, basemf)

169

if opath:

169

if opath:

170

if debug:

170

if debug:

171

dbg(b'debug.copies: rename of: %s\n' % opath)

171

dbg(b'debug.copies: rename of: %s\n' % opath)

172

cm[f] = opath

172

cm[f] = opath

173

if debug:

173

if debug:

174

dbg(

174

dbg(

175

b'debug.copies: time: %f seconds\n'

175

b'debug.copies: time: %f seconds\n'

176

% (util.timer() - start)

176

% (util.timer() - start)

177

)

177

)

178

return cm

178

return cm

179

180

181

def _revinfogetter(repo):

181

def _revinfogetter(repo):

182

"""return a function that return multiple data given a <rev>"i

182

"""return a function that return multiple data given a <rev>"i

183

184

* p1: revision number of first parent

184

* p1: revision number of first parent

185

* p2: revision number of first parent

185

* p2: revision number of first parent

186

* p1copies: mapping of copies from p1

186

* p1copies: mapping of copies from p1

187

* p2copies: mapping of copies from p2

187

* p2copies: mapping of copies from p2

188

* removed: a list of removed files

188

* removed: a list of removed files

189

"""

189

"""

190

cl = repo.changelog

190

cl = repo.changelog

191

parents = cl.parentrevs

191

parents = cl.parentrevs

192

193

def revinfo(rev):

193

def revinfo(rev):

194

p1, p2 = parents(rev)

194

p1, p2 = parents(rev)

195

ctx = repo[rev]

195

ctx = repo[rev]

196

p1copies, p2copies = ctx._copies

196

p1copies, p2copies = ctx._copies

197

removed = ctx.filesremoved()

197

removed = ctx.filesremoved()

198

return p1, p2, p1copies, p2copies, removed

198

return p1, p2, p1copies, p2copies, removed

199

200

return revinfo

200

return revinfo

201

202

203

def _changesetforwardcopies(a, b, match):

203

def _changesetforwardcopies(a, b, match):

204

if a.rev() in (node.nullrev, b.rev()):

204

if a.rev() in (node.nullrev, b.rev()):

205

return {}

205

return {}

206

207

repo = a.repo()

207

repo = a.repo().unfiltered()

208

children = {}

208

children = {}

209

revinfo = _revinfogetter(repo)

209

revinfo = _revinfogetter(repo)

210

211

cl = repo.changelog

211

cl = repo.changelog

212

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

212

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

213

for r in missingrevs:

213

for r in missingrevs:

214

for p in cl.parentrevs(r):

214

for p in cl.parentrevs(r):

215

if p == node.nullrev:

215

if p == node.nullrev:

216

continue

216

continue

217

if p not in children:

217

if p not in children:

218

children[p] = [r]

218

children[p] = [r]

219

else:

219

else:

220

children[p].append(r)

220

children[p].append(r)

221

222

roots = set(children) - set(missingrevs)

222

roots = set(children) - set(missingrevs)

223

work = list(roots)

223

work = list(roots)

224

all_copies = {r: {} for r in roots}

224

all_copies = {r: {} for r in roots}

225

heapq.heapify(work)

225

heapq.heapify(work)

226

alwaysmatch = match.always()

226

alwaysmatch = match.always()

227

while work:

227

while work:

228

r = heapq.heappop(work)

228

r = heapq.heappop(work)

229

copies = all_copies.pop(r)

229

copies = all_copies.pop(r)

230

if r == b.rev():

230

if r == b.rev():

231

return copies

231

return copies

232

for i, c in enumerate(children[r]):

232

for i, c in enumerate(children[r]):

233

p1, p2, p1copies, p2copies, removed = revinfo(c)

233

p1, p2, p1copies, p2copies, removed = revinfo(c)

234

if r == p1:

234

if r == p1:

235

parent = 1

235

parent = 1

236

childcopies = p1copies

236

childcopies = p1copies

237

else:

237

else:

238

assert r == p2

238

assert r == p2

239

parent = 2

239

parent = 2

240

childcopies = p2copies

240

childcopies = p2copies

241

if not alwaysmatch:

241

if not alwaysmatch:

242

childcopies = {

242

childcopies = {

243

dst: src for dst, src in childcopies.items() if match(dst)

243

dst: src for dst, src in childcopies.items() if match(dst)

244

}

244

}

245

# Copy the dict only if later iterations will also need it

245

# Copy the dict only if later iterations will also need it

246

if i != len(children[r]) - 1:

246

if i != len(children[r]) - 1:

247

newcopies = copies.copy()

247

newcopies = copies.copy()

248

else:

248

else:

249

newcopies = copies

249

newcopies = copies

250

if childcopies:

250

if childcopies:

251

newcopies = _chain(newcopies, childcopies)

251

newcopies = _chain(newcopies, childcopies)

252

for f in removed:

252

for f in removed:

253

if f in newcopies:

253

if f in newcopies:

254

del newcopies[f]

254

del newcopies[f]

255

othercopies = all_copies.get(c)

255

othercopies = all_copies.get(c)

256

if othercopies is None:

256

if othercopies is None:

257

heapq.heappush(work, c)

257

heapq.heappush(work, c)

258

all_copies[c] = newcopies

258

all_copies[c] = newcopies

259

else:

259

else:

260

# we are the second parent to work on c, we need to merge our

260

# we are the second parent to work on c, we need to merge our

261

# work with the other.

261

# work with the other.

262

#

262

#

263

# Unlike when copies are stored in the filelog, we consider

263

# Unlike when copies are stored in the filelog, we consider

264

# it a copy even if the destination already existed on the

264

# it a copy even if the destination already existed on the

265

# other branch. It's simply too expensive to check if the

265

# other branch. It's simply too expensive to check if the

266

# file existed in the manifest.

266

# file existed in the manifest.

267

#

267

#

268

# In case of conflict, parent 1 take precedence over parent 2.

268

# In case of conflict, parent 1 take precedence over parent 2.

269

# This is an arbitrary choice made anew when implementing

269

# This is an arbitrary choice made anew when implementing

270

# changeset based copies. It was made without regards with

270

# changeset based copies. It was made without regards with

271

# potential filelog related behavior.

271

# potential filelog related behavior.

272

if parent == 1:

272

if parent == 1:

273

othercopies.update(newcopies)

273

othercopies.update(newcopies)

274

else:

274

else:

275

newcopies.update(othercopies)

275

newcopies.update(othercopies)

276

all_copies[c] = newcopies

276

all_copies[c] = newcopies

277

assert False

277

assert False

278

279

280

def _forwardcopies(a, b, base=None, match=None):

280

def _forwardcopies(a, b, base=None, match=None):

281

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

281

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

282

283

if base is None:

283

if base is None:

284

base = a

284

base = a

285

match = a.repo().narrowmatch(match)

285

match = a.repo().narrowmatch(match)

286

# check for working copy

286

# check for working copy

287

if b.rev() is None:

287

if b.rev() is None:

288

cm = _committedforwardcopies(a, b.p1(), base, match)

288

cm = _committedforwardcopies(a, b.p1(), base, match)

289

# combine copies from dirstate if necessary

289

# combine copies from dirstate if necessary

290

copies = _chain(cm, _dirstatecopies(b._repo, match))

290

copies = _chain(cm, _dirstatecopies(b._repo, match))

291

else:

291

else:

292

copies = _committedforwardcopies(a, b, base, match)

292

copies = _committedforwardcopies(a, b, base, match)

293

return copies

293

return copies

294

295

296

def _backwardrenames(a, b, match):

296

def _backwardrenames(a, b, match):

297

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

297

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

298

return {}

298

return {}

299

300

# Even though we're not taking copies into account, 1:n rename situations

300

# Even though we're not taking copies into account, 1:n rename situations

301

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

301

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

302

# arbitrarily pick one of the renames.

302

# arbitrarily pick one of the renames.

303

# We don't want to pass in "match" here, since that would filter

303

# We don't want to pass in "match" here, since that would filter

304

# the destination by it. Since we're reversing the copies, we want

304

# the destination by it. Since we're reversing the copies, we want

305

# to filter the source instead.

305

# to filter the source instead.

306

f = _forwardcopies(b, a)

306

f = _forwardcopies(b, a)

307

r = {}

307

r = {}

308

for k, v in sorted(pycompat.iteritems(f)):

308

for k, v in sorted(pycompat.iteritems(f)):

309

if match and not match(v):

309

if match and not match(v):

310

continue

310

continue

311

# remove copies

311

# remove copies

312

if v in a:

312

if v in a:

313

continue

313

continue

314

r[v] = k

314

r[v] = k

315

return r

315

return r

316

317

318

def pathcopies(x, y, match=None):

318

def pathcopies(x, y, match=None):

319

"""find {dst@y: src@x} copy mapping for directed compare"""

319

"""find {dst@y: src@x} copy mapping for directed compare"""

320

repo = x._repo

320

repo = x._repo

321

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

321

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

322

if debug:

322

if debug:

323

repo.ui.debug(

323

repo.ui.debug(

324

b'debug.copies: searching copies from %s to %s\n' % (x, y)

324

b'debug.copies: searching copies from %s to %s\n' % (x, y)

325

)

325

)

326

if x == y or not x or not y:

326

if x == y or not x or not y:

327

return {}

327

return {}

328

a = y.ancestor(x)

328

a = y.ancestor(x)

329

if a == x:

329

if a == x:

330

if debug:

330

if debug:

331

repo.ui.debug(b'debug.copies: search mode: forward\n')

331

repo.ui.debug(b'debug.copies: search mode: forward\n')

332

if y.rev() is None and x == y.p1():

332

if y.rev() is None and x == y.p1():

333

# short-circuit to avoid issues with merge states

333

# short-circuit to avoid issues with merge states

334

return _dirstatecopies(repo, match)

334

return _dirstatecopies(repo, match)

335

copies = _forwardcopies(x, y, match=match)

335

copies = _forwardcopies(x, y, match=match)

336

elif a == y:

336

elif a == y:

337

if debug:

337

if debug:

338

repo.ui.debug(b'debug.copies: search mode: backward\n')

338

repo.ui.debug(b'debug.copies: search mode: backward\n')

339

copies = _backwardrenames(x, y, match=match)

339

copies = _backwardrenames(x, y, match=match)

340

else:

340

else:

341

if debug:

341

if debug:

342

repo.ui.debug(b'debug.copies: search mode: combined\n')

342

repo.ui.debug(b'debug.copies: search mode: combined\n')

343

base = None

343

base = None

344

if a.rev() != node.nullrev:

344

if a.rev() != node.nullrev:

345

base = x

345

base = x

346

copies = _chain(

346

copies = _chain(

347

_backwardrenames(x, a, match=match),

347

_backwardrenames(x, a, match=match),

348

_forwardcopies(a, y, base, match=match),

348

_forwardcopies(a, y, base, match=match),

349

)

349

)

350

_filter(x, y, copies)

350

_filter(x, y, copies)

351

return copies

351

return copies

352

353

354

def mergecopies(repo, c1, c2, base):

354

def mergecopies(repo, c1, c2, base):

355

"""

355

"""

356

Finds moves and copies between context c1 and c2 that are relevant for

356

Finds moves and copies between context c1 and c2 that are relevant for

357

merging. 'base' will be used as the merge base.

357

merging. 'base' will be used as the merge base.

358

359

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

359

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

360

files that were moved/ copied in one merge parent and modified in another.

360

files that were moved/ copied in one merge parent and modified in another.

361

For example:

361

For example:

362

363

o ---> 4 another commit

363

o ---> 4 another commit

364

|

364

|

365

| o ---> 3 commit that modifies a.txt

365

| o ---> 3 commit that modifies a.txt

366

| /

366

| /

367

o / ---> 2 commit that moves a.txt to b.txt

367

o / ---> 2 commit that moves a.txt to b.txt

368

|/

368

|/

369

o ---> 1 merge base

369

o ---> 1 merge base

370

371

If we try to rebase revision 3 on revision 4, since there is no a.txt in

371

If we try to rebase revision 3 on revision 4, since there is no a.txt in

372

revision 4, and if user have copytrace disabled, we prints the following

372

revision 4, and if user have copytrace disabled, we prints the following

373

message:

373

message:

374

375

```other changed <file> which local deleted```

375

```other changed <file> which local deleted```

376

377

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

377

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

378

"dirmove".

378

"dirmove".

379

380

"copy" is a mapping from destination name -> source name,

380

"copy" is a mapping from destination name -> source name,

381

where source is in c1 and destination is in c2 or vice-versa.

381

where source is in c1 and destination is in c2 or vice-versa.

382

383

"movewithdir" is a mapping from source name -> destination name,

383

"movewithdir" is a mapping from source name -> destination name,

384

where the file at source present in one context but not the other

384

where the file at source present in one context but not the other

385

needs to be moved to destination by the merge process, because the

385

needs to be moved to destination by the merge process, because the

386

other context moved the directory it is in.

386

other context moved the directory it is in.

387

388

"diverge" is a mapping of source name -> list of destination names

388

"diverge" is a mapping of source name -> list of destination names

389

for divergent renames.

389

for divergent renames.

390

391

"renamedelete" is a mapping of source name -> list of destination

391

"renamedelete" is a mapping of source name -> list of destination

392

names for files deleted in c1 that were renamed in c2 or vice-versa.

392

names for files deleted in c1 that were renamed in c2 or vice-versa.

393

394

"dirmove" is a mapping of detected source dir -> destination dir renames.

394

"dirmove" is a mapping of detected source dir -> destination dir renames.

395

This is needed for handling changes to new files previously grafted into

395

This is needed for handling changes to new files previously grafted into

396

renamed directories.

396

renamed directories.

397

398

This function calls different copytracing algorithms based on config.

398

This function calls different copytracing algorithms based on config.

399

"""

399

"""

400

# avoid silly behavior for update from empty dir

400

# avoid silly behavior for update from empty dir

401

if not c1 or not c2 or c1 == c2:

401

if not c1 or not c2 or c1 == c2:

402

return {}, {}, {}, {}, {}

402

return {}, {}, {}, {}, {}

403

404

narrowmatch = c1.repo().narrowmatch()

404

narrowmatch = c1.repo().narrowmatch()

405

406

# avoid silly behavior for parent -> working dir

406

# avoid silly behavior for parent -> working dir

407

if c2.node() is None and c1.node() == repo.dirstate.p1():

407

if c2.node() is None and c1.node() == repo.dirstate.p1():

408

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

408

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

409

410

copytracing = repo.ui.config(b'experimental', b'copytrace')

410

copytracing = repo.ui.config(b'experimental', b'copytrace')

411

if stringutil.parsebool(copytracing) is False:

411

if stringutil.parsebool(copytracing) is False:

412

# stringutil.parsebool() returns None when it is unable to parse the

412

# stringutil.parsebool() returns None when it is unable to parse the

413

# value, so we should rely on making sure copytracing is on such cases

413

# value, so we should rely on making sure copytracing is on such cases

414

return {}, {}, {}, {}, {}

414

return {}, {}, {}, {}, {}

415

416

if usechangesetcentricalgo(repo):

416

if usechangesetcentricalgo(repo):

417

# The heuristics don't make sense when we need changeset-centric algos

417

# The heuristics don't make sense when we need changeset-centric algos

418

return _fullcopytracing(repo, c1, c2, base)

418

return _fullcopytracing(repo, c1, c2, base)

419

420

# Copy trace disabling is explicitly below the node == p1 logic above

420

# Copy trace disabling is explicitly below the node == p1 logic above

421

# because the logic above is required for a simple copy to be kept across a

421

# because the logic above is required for a simple copy to be kept across a

422

# rebase.

422

# rebase.

423

if copytracing == b'heuristics':

423

if copytracing == b'heuristics':

424

# Do full copytracing if only non-public revisions are involved as

424

# Do full copytracing if only non-public revisions are involved as

425

# that will be fast enough and will also cover the copies which could

425

# that will be fast enough and will also cover the copies which could

426

# be missed by heuristics

426

# be missed by heuristics

427

if _isfullcopytraceable(repo, c1, base):

427

if _isfullcopytraceable(repo, c1, base):

428

return _fullcopytracing(repo, c1, c2, base)

428

return _fullcopytracing(repo, c1, c2, base)

429

return _heuristicscopytracing(repo, c1, c2, base)

429

return _heuristicscopytracing(repo, c1, c2, base)

430

else:

430

else:

431

return _fullcopytracing(repo, c1, c2, base)

431

return _fullcopytracing(repo, c1, c2, base)

432

433

434

def _isfullcopytraceable(repo, c1, base):

434

def _isfullcopytraceable(repo, c1, base):

435

""" Checks that if base, source and destination are all no-public branches,

435

""" Checks that if base, source and destination are all no-public branches,

436

if yes let's use the full copytrace algorithm for increased capabilities

436

if yes let's use the full copytrace algorithm for increased capabilities

437

since it will be fast enough.

437

since it will be fast enough.

438

439

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

439

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

440

number of changesets from c1 to base such that if number of changesets are

440

number of changesets from c1 to base such that if number of changesets are

441

more than the limit, full copytracing algorithm won't be used.

441

more than the limit, full copytracing algorithm won't be used.

442

"""

442

"""

443

if c1.rev() is None:

443

if c1.rev() is None:

444

c1 = c1.p1()

444

c1 = c1.p1()

445

if c1.mutable() and base.mutable():

445

if c1.mutable() and base.mutable():

446

sourcecommitlimit = repo.ui.configint(

446

sourcecommitlimit = repo.ui.configint(

447

b'experimental', b'copytrace.sourcecommitlimit'

447

b'experimental', b'copytrace.sourcecommitlimit'

448

)

448

)

449

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

449

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

450

return commits < sourcecommitlimit

450

return commits < sourcecommitlimit

451

return False

451

return False

452

453

454

def _checksinglesidecopies(

454

def _checksinglesidecopies(

455

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

455

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

456

):

456

):

457

if src not in m2:

457

if src not in m2:

458

# deleted on side 2

458

# deleted on side 2

459

if src not in m1:

459

if src not in m1:

460

# renamed on side 1, deleted on side 2

460

# renamed on side 1, deleted on side 2

461

renamedelete[src] = dsts1

461

renamedelete[src] = dsts1

462

elif m2[src] != mb[src]:

462

elif m2[src] != mb[src]:

463

if not _related(c2[src], base[src]):

463

if not _related(c2[src], base[src]):

464

return

464

return

465

# modified on side 2

465

# modified on side 2

466

for dst in dsts1:

466

for dst in dsts1:

467

if dst not in m2:

467

if dst not in m2:

468

# dst not added on side 2 (handle as regular

468

# dst not added on side 2 (handle as regular

469

# "both created" case in manifestmerge otherwise)

469

# "both created" case in manifestmerge otherwise)

470

copy[dst] = src

470

copy[dst] = src

471

472

473

def _fullcopytracing(repo, c1, c2, base):

473

def _fullcopytracing(repo, c1, c2, base):

474

""" The full copytracing algorithm which finds all the new files that were

474

""" The full copytracing algorithm which finds all the new files that were

475

added from merge base up to the top commit and for each file it checks if

475

added from merge base up to the top commit and for each file it checks if

476

this file was copied from another file.

476

this file was copied from another file.

477

478

This is pretty slow when a lot of changesets are involved but will track all

478

This is pretty slow when a lot of changesets are involved but will track all

479

the copies.

479

the copies.

480

"""

480

"""

481

m1 = c1.manifest()

481

m1 = c1.manifest()

482

m2 = c2.manifest()

482

m2 = c2.manifest()

483

mb = base.manifest()

483

mb = base.manifest()

484

485

copies1 = pathcopies(base, c1)

485

copies1 = pathcopies(base, c1)

486

copies2 = pathcopies(base, c2)

486

copies2 = pathcopies(base, c2)

487

488

inversecopies1 = {}

488

inversecopies1 = {}

489

inversecopies2 = {}

489

inversecopies2 = {}

490

for dst, src in copies1.items():

490

for dst, src in copies1.items():

491

inversecopies1.setdefault(src, []).append(dst)

491

inversecopies1.setdefault(src, []).append(dst)

492

for dst, src in copies2.items():

492

for dst, src in copies2.items():

493

inversecopies2.setdefault(src, []).append(dst)

493

inversecopies2.setdefault(src, []).append(dst)

494

495

copy = {}

495

copy = {}

496

diverge = {}

496

diverge = {}

497

renamedelete = {}

497

renamedelete = {}

498

allsources = set(inversecopies1) | set(inversecopies2)

498

allsources = set(inversecopies1) | set(inversecopies2)

499

for src in allsources:

499

for src in allsources:

500

dsts1 = inversecopies1.get(src)

500

dsts1 = inversecopies1.get(src)

501

dsts2 = inversecopies2.get(src)

501

dsts2 = inversecopies2.get(src)

502

if dsts1 and dsts2:

502

if dsts1 and dsts2:

503

# copied/renamed on both sides

503

# copied/renamed on both sides

504

if src not in m1 and src not in m2:

504

if src not in m1 and src not in m2:

505

# renamed on both sides

505

# renamed on both sides

506

dsts1 = set(dsts1)

506

dsts1 = set(dsts1)

507

dsts2 = set(dsts2)

507

dsts2 = set(dsts2)

508

# If there's some overlap in the rename destinations, we

508

# If there's some overlap in the rename destinations, we

509

# consider it not divergent. For example, if side 1 copies 'a'

509

# consider it not divergent. For example, if side 1 copies 'a'

510

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

510

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

511

# and 'd' and deletes 'a'.

511

# and 'd' and deletes 'a'.

512

if dsts1 & dsts2:

512

if dsts1 & dsts2:

513

for dst in dsts1 & dsts2:

513

for dst in dsts1 & dsts2:

514

copy[dst] = src

514

copy[dst] = src

515

else:

515

else:

516

diverge[src] = sorted(dsts1 | dsts2)

516

diverge[src] = sorted(dsts1 | dsts2)

517

elif src in m1 and src in m2:

517

elif src in m1 and src in m2:

518

# copied on both sides

518

# copied on both sides

519

dsts1 = set(dsts1)

519

dsts1 = set(dsts1)

520

dsts2 = set(dsts2)

520

dsts2 = set(dsts2)

521

for dst in dsts1 & dsts2:

521

for dst in dsts1 & dsts2:

522

copy[dst] = src

522

copy[dst] = src

523

# TODO: Handle cases where it was renamed on one side and copied

523

# TODO: Handle cases where it was renamed on one side and copied

524

# on the other side

524

# on the other side

525

elif dsts1:

525

elif dsts1:

526

# copied/renamed only on side 1

526

# copied/renamed only on side 1

527

_checksinglesidecopies(

527

_checksinglesidecopies(

528

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

528

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

529

)

529

)

530

elif dsts2:

530

elif dsts2:

531

# copied/renamed only on side 2

531

# copied/renamed only on side 2

532

_checksinglesidecopies(

532

_checksinglesidecopies(

533

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

533

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

534

)

534

)

535

536

renamedeleteset = set()

536

renamedeleteset = set()

537

divergeset = set()

537

divergeset = set()

538

for dsts in diverge.values():

538

for dsts in diverge.values():

539

divergeset.update(dsts)

539

divergeset.update(dsts)

540

for dsts in renamedelete.values():

540

for dsts in renamedelete.values():

541

renamedeleteset.update(dsts)

541

renamedeleteset.update(dsts)

542

543

# find interesting file sets from manifests

543

# find interesting file sets from manifests

544

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

544

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

545

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

545

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

546

u1 = sorted(addedinm1 - addedinm2)

546

u1 = sorted(addedinm1 - addedinm2)

547

u2 = sorted(addedinm2 - addedinm1)

547

u2 = sorted(addedinm2 - addedinm1)

548

549

header = b" unmatched files in %s"

549

header = b" unmatched files in %s"

550

if u1:

550

if u1:

551

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

551

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

552

if u2:

552

if u2:

553

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

553

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

554

555

fullcopy = copies1.copy()

555

fullcopy = copies1.copy()

556

fullcopy.update(copies2)

556

fullcopy.update(copies2)

557

if not fullcopy:

557

if not fullcopy:

558

return copy, {}, diverge, renamedelete, {}

558

return copy, {}, diverge, renamedelete, {}

559

560

if repo.ui.debugflag:

560

if repo.ui.debugflag:

561

repo.ui.debug(

561

repo.ui.debug(

562

b" all copies found (* = to merge, ! = divergent, "

562

b" all copies found (* = to merge, ! = divergent, "

563

b"% = renamed and deleted):\n"

563

b"% = renamed and deleted):\n"

564

)

564

)

565

for f in sorted(fullcopy):

565

for f in sorted(fullcopy):

566

note = b""

566

note = b""

567

if f in copy:

567

if f in copy:

568

note += b"*"

568

note += b"*"

569

if f in divergeset:

569

if f in divergeset:

570

note += b"!"

570

note += b"!"

571

if f in renamedeleteset:

571

if f in renamedeleteset:

572

note += b"%"

572

note += b"%"

573

repo.ui.debug(

573

repo.ui.debug(

574

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

574

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

575

)

575

)

576

del divergeset

576

del divergeset

577

578

repo.ui.debug(b" checking for directory renames\n")

578

repo.ui.debug(b" checking for directory renames\n")

579

580

# generate a directory move map

580

# generate a directory move map

581

d1, d2 = c1.dirs(), c2.dirs()

581

d1, d2 = c1.dirs(), c2.dirs()

582

invalid = set()

582

invalid = set()

583

dirmove = {}

583

dirmove = {}

584

585

# examine each file copy for a potential directory move, which is

585

# examine each file copy for a potential directory move, which is

586

# when all the files in a directory are moved to a new directory

586

# when all the files in a directory are moved to a new directory

587

for dst, src in pycompat.iteritems(fullcopy):

587

for dst, src in pycompat.iteritems(fullcopy):

588

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

588

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

589

if dsrc in invalid:

589

if dsrc in invalid:

590

# already seen to be uninteresting

590

# already seen to be uninteresting

591

continue

591

continue

592

elif dsrc in d1 and ddst in d1:

592

elif dsrc in d1 and ddst in d1:

593

# directory wasn't entirely moved locally

593

# directory wasn't entirely moved locally

594

invalid.add(dsrc)

594

invalid.add(dsrc)

595

elif dsrc in d2 and ddst in d2:

595

elif dsrc in d2 and ddst in d2:

596

# directory wasn't entirely moved remotely

596

# directory wasn't entirely moved remotely

597

invalid.add(dsrc)

597

invalid.add(dsrc)

598

elif dsrc in dirmove and dirmove[dsrc] != ddst:

598

elif dsrc in dirmove and dirmove[dsrc] != ddst:

599

# files from the same directory moved to two different places

599

# files from the same directory moved to two different places

600

invalid.add(dsrc)

600

invalid.add(dsrc)

601

else:

601

else:

602

# looks good so far

602

# looks good so far

603

dirmove[dsrc] = ddst

603

dirmove[dsrc] = ddst

604

605

for i in invalid:

605

for i in invalid:

606

if i in dirmove:

606

if i in dirmove:

607

del dirmove[i]

607

del dirmove[i]

608

del d1, d2, invalid

608

del d1, d2, invalid

609

610

if not dirmove:

610

if not dirmove:

611

return copy, {}, diverge, renamedelete, {}

611

return copy, {}, diverge, renamedelete, {}

612

613

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

613

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

614

615

for d in dirmove:

615

for d in dirmove:

616

repo.ui.debug(

616

repo.ui.debug(

617

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

617

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

618

)

618

)

619

620

movewithdir = {}

620

movewithdir = {}

621

# check unaccounted nonoverlapping files against directory moves

621

# check unaccounted nonoverlapping files against directory moves

622

for f in u1 + u2:

622

for f in u1 + u2:

623

if f not in fullcopy:

623

if f not in fullcopy:

624

for d in dirmove:

624

for d in dirmove:

625

if f.startswith(d):

625

if f.startswith(d):

626

# new file added in a directory that was moved, move it

626

# new file added in a directory that was moved, move it

627

df = dirmove[d] + f[len(d) :]

627

df = dirmove[d] + f[len(d) :]

628

if df not in copy:

628

if df not in copy:

629

movewithdir[f] = df

629

movewithdir[f] = df

630

repo.ui.debug(

630

repo.ui.debug(

631

b" pending file src: '%s' -> dst: '%s'\n"

631

b" pending file src: '%s' -> dst: '%s'\n"

632

% (f, df)

632

% (f, df)

633

)

633

)

634

break

634

break

635

636

return copy, movewithdir, diverge, renamedelete, dirmove

636

return copy, movewithdir, diverge, renamedelete, dirmove

637

638

639

def _heuristicscopytracing(repo, c1, c2, base):

639

def _heuristicscopytracing(repo, c1, c2, base):

640

""" Fast copytracing using filename heuristics

640

""" Fast copytracing using filename heuristics

641

642

Assumes that moves or renames are of following two types:

642

Assumes that moves or renames are of following two types:

643

644

1) Inside a directory only (same directory name but different filenames)

644

1) Inside a directory only (same directory name but different filenames)

645

2) Move from one directory to another

645

2) Move from one directory to another

646

(same filenames but different directory names)

646

(same filenames but different directory names)

647

648

Works only when there are no merge commits in the "source branch".

648

Works only when there are no merge commits in the "source branch".

649

Source branch is commits from base up to c2 not including base.

649

Source branch is commits from base up to c2 not including base.

650

651

If merge is involved it fallbacks to _fullcopytracing().

651

If merge is involved it fallbacks to _fullcopytracing().

652

653

Can be used by setting the following config:

653

Can be used by setting the following config:

654

655

[experimental]

655

[experimental]

656

copytrace = heuristics

656

copytrace = heuristics

657

658

In some cases the copy/move candidates found by heuristics can be very large

658

In some cases the copy/move candidates found by heuristics can be very large

659

in number and that will make the algorithm slow. The number of possible

659

in number and that will make the algorithm slow. The number of possible

660

candidates to check can be limited by using the config

660

candidates to check can be limited by using the config

661

`experimental.copytrace.movecandidateslimit` which defaults to 100.

661

`experimental.copytrace.movecandidateslimit` which defaults to 100.

662

"""

662

"""

663

664

if c1.rev() is None:

664

if c1.rev() is None:

665

c1 = c1.p1()

665

c1 = c1.p1()

666

if c2.rev() is None:

666

if c2.rev() is None:

667

c2 = c2.p1()

667

c2 = c2.p1()

668

669

copies = {}

669

copies = {}

670

671

changedfiles = set()

671

changedfiles = set()

672

m1 = c1.manifest()

672

m1 = c1.manifest()

673

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

673

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

674

# If base is not in c2 branch, we switch to fullcopytracing

674

# If base is not in c2 branch, we switch to fullcopytracing

675

repo.ui.debug(

675

repo.ui.debug(

676

b"switching to full copytracing as base is not "

676

b"switching to full copytracing as base is not "

677

b"an ancestor of c2\n"

677

b"an ancestor of c2\n"

678

)

678

)

679

return _fullcopytracing(repo, c1, c2, base)

679

return _fullcopytracing(repo, c1, c2, base)

680

681

ctx = c2

681

ctx = c2

682

while ctx != base:

682

while ctx != base:

683

if len(ctx.parents()) == 2:

683

if len(ctx.parents()) == 2:

684

# To keep things simple let's not handle merges

684

# To keep things simple let's not handle merges

685

repo.ui.debug(b"switching to full copytracing because of merges\n")

685

repo.ui.debug(b"switching to full copytracing because of merges\n")

686

return _fullcopytracing(repo, c1, c2, base)

686

return _fullcopytracing(repo, c1, c2, base)

687

changedfiles.update(ctx.files())

687

changedfiles.update(ctx.files())

688

ctx = ctx.p1()

688

ctx = ctx.p1()

689

690

cp = _forwardcopies(base, c2)

690

cp = _forwardcopies(base, c2)

691

for dst, src in pycompat.iteritems(cp):

691

for dst, src in pycompat.iteritems(cp):

692

if src in m1:

692

if src in m1:

693

copies[dst] = src

693

copies[dst] = src

694

695

# file is missing if it isn't present in the destination, but is present in

695

# file is missing if it isn't present in the destination, but is present in

696

# the base and present in the source.

696

# the base and present in the source.

697

# Presence in the base is important to exclude added files, presence in the

697

# Presence in the base is important to exclude added files, presence in the

698

# source is important to exclude removed files.

698

# source is important to exclude removed files.

699

filt = lambda f: f not in m1 and f in base and f in c2

699

filt = lambda f: f not in m1 and f in base and f in c2

700

missingfiles = [f for f in changedfiles if filt(f)]

700

missingfiles = [f for f in changedfiles if filt(f)]

701

702

if missingfiles:

702

if missingfiles:

703

basenametofilename = collections.defaultdict(list)

703

basenametofilename = collections.defaultdict(list)

704

dirnametofilename = collections.defaultdict(list)

704

dirnametofilename = collections.defaultdict(list)

705

706

for f in m1.filesnotin(base.manifest()):

706

for f in m1.filesnotin(base.manifest()):

707

basename = os.path.basename(f)

707

basename = os.path.basename(f)

708

dirname = os.path.dirname(f)

708

dirname = os.path.dirname(f)

709

basenametofilename[basename].append(f)

709

basenametofilename[basename].append(f)

710

dirnametofilename[dirname].append(f)

710

dirnametofilename[dirname].append(f)

711

712

for f in missingfiles:

712

for f in missingfiles:

713

basename = os.path.basename(f)

713

basename = os.path.basename(f)

714

dirname = os.path.dirname(f)

714

dirname = os.path.dirname(f)

715

samebasename = basenametofilename[basename]

715

samebasename = basenametofilename[basename]

716

samedirname = dirnametofilename[dirname]

716

samedirname = dirnametofilename[dirname]

717

movecandidates = samebasename + samedirname

717

movecandidates = samebasename + samedirname

718

# f is guaranteed to be present in c2, that's why

718

# f is guaranteed to be present in c2, that's why

719

# c2.filectx(f) won't fail

719

# c2.filectx(f) won't fail

720

f2 = c2.filectx(f)

720

f2 = c2.filectx(f)

721

# we can have a lot of candidates which can slow down the heuristics

721

# we can have a lot of candidates which can slow down the heuristics

722

# config value to limit the number of candidates moves to check

722

# config value to limit the number of candidates moves to check

723

maxcandidates = repo.ui.configint(

723

maxcandidates = repo.ui.configint(

724

b'experimental', b'copytrace.movecandidateslimit'

724

b'experimental', b'copytrace.movecandidateslimit'

725

)

725

)

726

727

if len(movecandidates) > maxcandidates:

727

if len(movecandidates) > maxcandidates:

728

repo.ui.status(

728

repo.ui.status(

729

_(

729

_(

730

b"skipping copytracing for '%s', more "

730

b"skipping copytracing for '%s', more "

731

b"candidates than the limit: %d\n"

731

b"candidates than the limit: %d\n"

732

)

732

)

733

% (f, len(movecandidates))

733

% (f, len(movecandidates))

734

)

734

)

735

continue

735

continue

736

737

for candidate in movecandidates:

737

for candidate in movecandidates:

738

f1 = c1.filectx(candidate)

738

f1 = c1.filectx(candidate)

739

if _related(f1, f2):

739

if _related(f1, f2):

740

# if there are a few related copies then we'll merge

740

# if there are a few related copies then we'll merge

741

# changes into all of them. This matches the behaviour

741

# changes into all of them. This matches the behaviour

742

# of upstream copytracing

742

# of upstream copytracing

743

copies[candidate] = f

743

copies[candidate] = f

744

745

return copies, {}, {}, {}, {}

745

return copies, {}, {}, {}, {}

746

747

748

def _related(f1, f2):

748

def _related(f1, f2):

749

"""return True if f1 and f2 filectx have a common ancestor

749

"""return True if f1 and f2 filectx have a common ancestor

750

751

Walk back to common ancestor to see if the two files originate

751

Walk back to common ancestor to see if the two files originate

752

from the same file. Since workingfilectx's rev() is None it messes

752

from the same file. Since workingfilectx's rev() is None it messes

753

up the integer comparison logic, hence the pre-step check for

753

up the integer comparison logic, hence the pre-step check for

754

None (f1 and f2 can only be workingfilectx's initially).

754

None (f1 and f2 can only be workingfilectx's initially).

755

"""

755

"""

756

757

if f1 == f2:

757

if f1 == f2:

758

return True # a match

758

return True # a match

759

760

g1, g2 = f1.ancestors(), f2.ancestors()

760

g1, g2 = f1.ancestors(), f2.ancestors()

761

try:

761

try:

762

f1r, f2r = f1.linkrev(), f2.linkrev()

762

f1r, f2r = f1.linkrev(), f2.linkrev()

763

764

if f1r is None:

764

if f1r is None:

765

f1 = next(g1)

765

f1 = next(g1)

766

if f2r is None:

766

if f2r is None:

767

f2 = next(g2)

767

f2 = next(g2)

768

769

while True:

769

while True:

770

f1r, f2r = f1.linkrev(), f2.linkrev()

770

f1r, f2r = f1.linkrev(), f2.linkrev()

771

if f1r > f2r:

771

if f1r > f2r:

772

f1 = next(g1)

772

f1 = next(g1)

773

elif f2r > f1r:

773

elif f2r > f1r:

774

f2 = next(g2)

774

f2 = next(g2)

775

else: # f1 and f2 point to files in the same linkrev

775

else: # f1 and f2 point to files in the same linkrev

776

return f1 == f2 # true if they point to the same file

776

return f1 == f2 # true if they point to the same file

777

except StopIteration:

777

except StopIteration:

778

return False

778

return False

779

780

781

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

781

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

782

"""reproduce copies from fromrev to rev in the dirstate

782

"""reproduce copies from fromrev to rev in the dirstate

783

784

If skiprev is specified, it's a revision that should be used to

784

If skiprev is specified, it's a revision that should be used to

785

filter copy records. Any copies that occur between fromrev and

785

filter copy records. Any copies that occur between fromrev and

786

skiprev will not be duplicated, even if they appear in the set of

786

skiprev will not be duplicated, even if they appear in the set of

787

copies between fromrev and rev.

787

copies between fromrev and rev.

788

"""

788

"""

789

exclude = {}

789

exclude = {}

790

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

790

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

791

bctrace = stringutil.parsebool(ctraceconfig)

791

bctrace = stringutil.parsebool(ctraceconfig)

792

if skiprev is not None and (

792

if skiprev is not None and (

793

ctraceconfig == b'heuristics' or bctrace or bctrace is None

793

ctraceconfig == b'heuristics' or bctrace or bctrace is None

794

):

794

):

795

# copytrace='off' skips this line, but not the entire function because

795

# copytrace='off' skips this line, but not the entire function because

796

# the line below is O(size of the repo) during a rebase, while the rest

796

# the line below is O(size of the repo) during a rebase, while the rest

797

# of the function is much faster (and is required for carrying copy

797

# of the function is much faster (and is required for carrying copy

798

# metadata across the rebase anyway).

798

# metadata across the rebase anyway).

799

exclude = pathcopies(repo[fromrev], repo[skiprev])

799

exclude = pathcopies(repo[fromrev], repo[skiprev])

800

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

800

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

801

if dst in exclude:

801

if dst in exclude:

802

continue

802

continue

803

if dst in wctx:

803

if dst in wctx:

804

wctx[dst].markcopied(src)

804

wctx[dst].markcopied(src)

805

806

807

def computechangesetfilesadded(ctx):

807

def computechangesetfilesadded(ctx):

808

"""return the list of files added in a changeset

808

"""return the list of files added in a changeset

809

"""

809

"""

810

added = []

810

added = []

811

for f in ctx.files():

811

for f in ctx.files():

812

if not any(f in p for p in ctx.parents()):

812

if not any(f in p for p in ctx.parents()):

813

added.append(f)

813

added.append(f)

814

return added

814

return added

815

816

817

def computechangesetfilesremoved(ctx):

817

def computechangesetfilesremoved(ctx):

818

"""return the list of files removed in a changeset

818

"""return the list of files removed in a changeset

819

"""

819

"""

820

removed = []

820

removed = []

821

for f in ctx.files():

821

for f in ctx.files():

822

if f not in ctx:

822

if f not in ctx:

823

removed.append(f)

823

removed.append(f)

824

return removed

824

return removed

825

826

827

def computechangesetcopies(ctx):

827

def computechangesetcopies(ctx):

828

"""return the copies data for a changeset

828

"""return the copies data for a changeset

829

830

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

830

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

831

832

Each dictionnary are in the form: `{newname: oldname}`

832

Each dictionnary are in the form: `{newname: oldname}`

833

"""

833

"""

834

p1copies = {}

834

p1copies = {}

835

p2copies = {}

835

p2copies = {}

836

p1 = ctx.p1()

836

p1 = ctx.p1()

837

p2 = ctx.p2()

837

p2 = ctx.p2()

838

narrowmatch = ctx._repo.narrowmatch()

838

narrowmatch = ctx._repo.narrowmatch()

839

for dst in ctx.files():

839

for dst in ctx.files():

840

if not narrowmatch(dst) or dst not in ctx:

840

if not narrowmatch(dst) or dst not in ctx:

841

continue

841

continue

842

copied = ctx[dst].renamed()

842

copied = ctx[dst].renamed()

843

if not copied:

843

if not copied:

844

continue

844

continue

845

src, srcnode = copied

845

src, srcnode = copied

846

if src in p1 and p1[src].filenode() == srcnode:

846

if src in p1 and p1[src].filenode() == srcnode:

847

p1copies[dst] = src

847

p1copies[dst] = src

848

elif src in p2 and p2[src].filenode() == srcnode:

848

elif src in p2 and p2[src].filenode() == srcnode:

849

p2copies[dst] = src

849

p2copies[dst] = src

850

return p1copies, p2copies

850

return p1copies, p2copies

851

852

853

def encodecopies(files, copies):

853

def encodecopies(files, copies):

854

items = []

854

items = []

855

for i, dst in enumerate(files):

855

for i, dst in enumerate(files):

856

if dst in copies:

856

if dst in copies:

857

items.append(b'%d\0%s' % (i, copies[dst]))

857

items.append(b'%d\0%s' % (i, copies[dst]))

858

if len(items) != len(copies):

858

if len(items) != len(copies):

859

raise error.ProgrammingError(

859

raise error.ProgrammingError(

860

b'some copy targets missing from file list'

860

b'some copy targets missing from file list'

861

)

861

)

862

return b"\n".join(items)

862

return b"\n".join(items)

863

864

865

def decodecopies(files, data):

865

def decodecopies(files, data):

866

try:

866

try:

867

copies = {}

867

copies = {}

868

if not data:

868

if not data:

869

return copies

869

return copies

870

for l in data.split(b'\n'):

870

for l in data.split(b'\n'):

871

strindex, src = l.split(b'\0')

871

strindex, src = l.split(b'\0')

872

i = int(strindex)

872

i = int(strindex)

873

dst = files[i]

873

dst = files[i]

874

copies[dst] = src

874

copies[dst] = src

875

return copies

875

return copies

876

except (ValueError, IndexError):

876

except (ValueError, IndexError):

877

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

877

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

878

# used different syntax for the value.

878

# used different syntax for the value.

879

return None

879

return None

880

881

882

def encodefileindices(files, subset):

882

def encodefileindices(files, subset):

883

subset = set(subset)

883

subset = set(subset)

884

indices = []

884

indices = []

885

for i, f in enumerate(files):

885

for i, f in enumerate(files):

886

if f in subset:

886

if f in subset:

887

indices.append(b'%d' % i)

887

indices.append(b'%d' % i)

888

return b'\n'.join(indices)

888

return b'\n'.join(indices)

889

890

891

def decodefileindices(files, data):

891

def decodefileindices(files, data):

892

try:

892

try:

893

subset = []

893

subset = []

894

if not data:

894

if not data:

895

return subset

895

return subset

896

for strindex in data.split(b'\n'):

896

for strindex in data.split(b'\n'):

897

i = int(strindex)

897

i = int(strindex)

898

if i < 0 or i >= len(files):

898

if i < 0 or i >= len(files):

899

return None

899

return None

900

subset.append(files[i])

900

subset.append(files[i])

901

return subset

901

return subset

902

except (ValueError, IndexError):

902

except (ValueError, IndexError):

903

# Perhaps someone had chosen the same key name (e.g. "added") and

903

# Perhaps someone had chosen the same key name (e.g. "added") and

904

# used different syntax for the value.

904

# used different syntax for the value.

905

return None

905

return None

906

907

908

def _getsidedata(srcrepo, rev):

908

def _getsidedata(srcrepo, rev):

909

ctx = srcrepo[rev]

909

ctx = srcrepo[rev]

910

filescopies = computechangesetcopies(ctx)

910

filescopies = computechangesetcopies(ctx)

911

filesadded = computechangesetfilesadded(ctx)

911

filesadded = computechangesetfilesadded(ctx)

912

filesremoved = computechangesetfilesremoved(ctx)

912

filesremoved = computechangesetfilesremoved(ctx)

913

sidedata = {}

913

sidedata = {}

914

if any([filescopies, filesadded, filesremoved]):

914

if any([filescopies, filesadded, filesremoved]):

915

sortedfiles = sorted(ctx.files())

915

sortedfiles = sorted(ctx.files())

916

p1copies, p2copies = filescopies

916

p1copies, p2copies = filescopies

917

p1copies = encodecopies(sortedfiles, p1copies)

917

p1copies = encodecopies(sortedfiles, p1copies)

918

p2copies = encodecopies(sortedfiles, p2copies)

918

p2copies = encodecopies(sortedfiles, p2copies)

919

filesadded = encodefileindices(sortedfiles, filesadded)

919

filesadded = encodefileindices(sortedfiles, filesadded)

920

filesremoved = encodefileindices(sortedfiles, filesremoved)

920

filesremoved = encodefileindices(sortedfiles, filesremoved)

921

if p1copies:

921

if p1copies:

922

sidedata[sidedatamod.SD_P1COPIES] = p1copies

922

sidedata[sidedatamod.SD_P1COPIES] = p1copies

923

if p2copies:

923

if p2copies:

924

sidedata[sidedatamod.SD_P2COPIES] = p2copies

924

sidedata[sidedatamod.SD_P2COPIES] = p2copies

925

if filesadded:

925

if filesadded:

926

sidedata[sidedatamod.SD_FILESADDED] = filesadded

926

sidedata[sidedatamod.SD_FILESADDED] = filesadded

927

if filesremoved:

927

if filesremoved:

928

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

928

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

929

return sidedata

929

return sidedata

930

931

932

def getsidedataadder(srcrepo, destrepo):

932

def getsidedataadder(srcrepo, destrepo):

933

def sidedatacompanion(revlog, rev):

933

def sidedatacompanion(revlog, rev):

934

sidedata = {}

934

sidedata = {}

935

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

935

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

936

sidedata = _getsidedata(srcrepo, rev)

936

sidedata = _getsidedata(srcrepo, rev)

937

return False, (), sidedata

937

return False, (), sidedata

938

939

return sidedatacompanion

939

return sidedatacompanion

940

941

942

def getsidedataremover(srcrepo, destrepo):

942

def getsidedataremover(srcrepo, destrepo):

943

def sidedatacompanion(revlog, rev):

943

def sidedatacompanion(revlog, rev):

944

f = ()

944

f = ()

945

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

945

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

946

if revlog.flags(rev) & REVIDX_SIDEDATA:

946

if revlog.flags(rev) & REVIDX_SIDEDATA:

947

f = (

947

f = (

948

sidedatamod.SD_P1COPIES,

948

sidedatamod.SD_P1COPIES,

949

sidedatamod.SD_P2COPIES,

949

sidedatamod.SD_P2COPIES,

950

sidedatamod.SD_FILESADDED,

950

sidedatamod.SD_FILESADDED,

951

sidedatamod.SD_FILESREMOVED,

951

sidedatamod.SD_FILESREMOVED,

952

)

952

)

953

return False, f, {}

953

return False, f, {}

954

955

return sidedatacompanion

955

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfogetter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
-                repo = a.repo()
+                repo = a.repo().unfiltered()
                 children = {}
                 revinfo = _revinfogetter(repo)
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                 roots = set(children) - set(missingrevs)
                 work = list(roots)
                 all_copies = {r: {} for r in roots}
                 heapq.heapify(work)
                 alwaysmatch = match.always()
                 while work:
                     r = heapq.heappop(work)
                     copies = all_copies.pop(r)
                     if r == b.rev():
                         return copies
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         # Copy the dict only if later iterations will also need it
                         if i != len(children[r]) - 1:
                             newcopies = copies.copy()
                         else:
                             newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                         for f in removed:
                             if f in newcopies:
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             heapq.heappush(work, c)
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 assert False
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion