upstream/mercurial-mirror Commit - r43527:0aef9177

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(a, b):

67

def _chain(a, b):

68

"""chain two sets of copies 'a' and 'b'"""

68

"""chain two sets of copies 'a' and 'b'"""

69

t = a.copy()

69

t = a.copy()

70

for k, v in pycompat.iteritems(b):

70

for k, v in pycompat.iteritems(b):

71

if v in t:

71

if v in t:

72

t[k] = t[v]

72

t[k] = t[v]

73

else:

73

else:

74

t[k] = v

74

t[k] = v

75

return t

75

return t

76

77

78

def _tracefile(fctx, am, basemf):

78

def _tracefile(fctx, am, basemf):

79

"""return file context that is the ancestor of fctx present in ancestor

79

"""return file context that is the ancestor of fctx present in ancestor

80

manifest am

80

manifest am

81

82

Note: we used to try and stop after a given limit, however checking if that

82

Note: we used to try and stop after a given limit, however checking if that

83

limit is reached turned out to be very expensive. we are better off

83

limit is reached turned out to be very expensive. we are better off

84

disabling that feature."""

84

disabling that feature."""

85

86

for f in fctx.ancestors():

86

for f in fctx.ancestors():

87

path = f.path()

87

path = f.path()

88

if am.get(path, None) == f.filenode():

88

if am.get(path, None) == f.filenode():

89

return path

89

return path

90

if basemf and basemf.get(path, None) == f.filenode():

90

if basemf and basemf.get(path, None) == f.filenode():

91

return path

91

return path

92

93

94

def _dirstatecopies(repo, match=None):

94

def _dirstatecopies(repo, match=None):

95

ds = repo.dirstate

95

ds = repo.dirstate

96

c = ds.copies().copy()

96

c = ds.copies().copy()

97

for k in list(c):

97

for k in list(c):

98

if ds[k] not in b'anm' or (match and not match(k)):

98

if ds[k] not in b'anm' or (match and not match(k)):

99

del c[k]

99

del c[k]

100

return c

100

return c

101

102

103

def _computeforwardmissing(a, b, match=None):

103

def _computeforwardmissing(a, b, match=None):

104

"""Computes which files are in b but not a.

104

"""Computes which files are in b but not a.

105

This is its own function so extensions can easily wrap this call to see what

105

This is its own function so extensions can easily wrap this call to see what

106

files _forwardcopies is about to process.

106

files _forwardcopies is about to process.

107

"""

107

"""

108

ma = a.manifest()

108

ma = a.manifest()

109

mb = b.manifest()

109

mb = b.manifest()

110

return mb.filesnotin(ma, match=match)

110

return mb.filesnotin(ma, match=match)

111

112

113

def usechangesetcentricalgo(repo):

113

def usechangesetcentricalgo(repo):

114

"""Checks if we should use changeset-centric copy algorithms"""

114

"""Checks if we should use changeset-centric copy algorithms"""

115

if repo.filecopiesmode == b'changeset-sidedata':

115

if repo.filecopiesmode == b'changeset-sidedata':

116

return True

116

return True

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

118

changesetsource = (b'changeset-only', b'compatibility')

118

changesetsource = (b'changeset-only', b'compatibility')

119

return readfrom in changesetsource

119

return readfrom in changesetsource

120

121

122

def _committedforwardcopies(a, b, base, match):

122

def _committedforwardcopies(a, b, base, match):

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

124

# files might have to be traced back to the fctx parent of the last

124

# files might have to be traced back to the fctx parent of the last

125

# one-side-only changeset, but not further back than that

125

# one-side-only changeset, but not further back than that

126

repo = a._repo

126

repo = a._repo

127

128

if usechangesetcentricalgo(repo):

128

if usechangesetcentricalgo(repo):

129

return _changesetforwardcopies(a, b, match)

129

return _changesetforwardcopies(a, b, match)

130

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

132

dbg = repo.ui.debug

132

dbg = repo.ui.debug

133

if debug:

133

if debug:

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

135

am = a.manifest()

135

am = a.manifest()

136

basemf = None if base is None else base.manifest()

136

basemf = None if base is None else base.manifest()

137

138

# find where new files came from

138

# find where new files came from

139

# we currently don't try to find where old files went, too expensive

139

# we currently don't try to find where old files went, too expensive

140

# this means we can miss a case like 'hg rm b; hg cp a b'

140

# this means we can miss a case like 'hg rm b; hg cp a b'

141

cm = {}

141

cm = {}

142

143

# Computing the forward missing is quite expensive on large manifests, since

143

# Computing the forward missing is quite expensive on large manifests, since

144

# it compares the entire manifests. We can optimize it in the common use

144

# it compares the entire manifests. We can optimize it in the common use

145

# case of computing what copies are in a commit versus its parent (like

145

# case of computing what copies are in a commit versus its parent (like

146

# during a rebase or histedit). Note, we exclude merge commits from this

146

# during a rebase or histedit). Note, we exclude merge commits from this

147

# optimization, since the ctx.files() for a merge commit is not correct for

147

# optimization, since the ctx.files() for a merge commit is not correct for

148

# this comparison.

148

# this comparison.

149

forwardmissingmatch = match

149

forwardmissingmatch = match

150

if b.p1() == a and b.p2().node() == node.nullid:

150

if b.p1() == a and b.p2().node() == node.nullid:

151

filesmatcher = matchmod.exact(b.files())

151

filesmatcher = matchmod.exact(b.files())

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

154

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

156

157

if debug:

157

if debug:

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

159

160

for f in sorted(missing):

160

for f in sorted(missing):

161

if debug:

161

if debug:

162

dbg(b'debug.copies: tracing file: %s\n' % f)

162

dbg(b'debug.copies: tracing file: %s\n' % f)

163

fctx = b[f]

163

fctx = b[f]

164

fctx._ancestrycontext = ancestrycontext

164

fctx._ancestrycontext = ancestrycontext

165

166

if debug:

166

if debug:

167

start = util.timer()

167

start = util.timer()

168

opath = _tracefile(fctx, am, basemf)

168

opath = _tracefile(fctx, am, basemf)

169

if opath:

169

if opath:

170

if debug:

170

if debug:

171

dbg(b'debug.copies: rename of: %s\n' % opath)

171

dbg(b'debug.copies: rename of: %s\n' % opath)

172

cm[f] = opath

172

cm[f] = opath

173

if debug:

173

if debug:

174

dbg(

174

dbg(

175

b'debug.copies: time: %f seconds\n'

175

b'debug.copies: time: %f seconds\n'

176

% (util.timer() - start)

176

% (util.timer() - start)

177

)

177

)

178

return cm

178

return cm

179

180

181

def _changesetforwardcopies(a, b, match):

181

def _changesetforwardcopies(a, b, match):

182

if a.rev() in (node.nullrev, b.rev()):

182

if a.rev() in (node.nullrev, b.rev()):

183

return {}

183

return {}

184

185

repo = a.repo()

185

repo = a.repo()

186

children = {}

186

children = {}

187

cl = repo.changelog

187

cl = repo.changelog

188

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

188

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

189

for r in missingrevs:

189

for r in missingrevs:

190

for p in cl.parentrevs(r):

190

for p in cl.parentrevs(r):

191

if p == node.nullrev:

191

if p == node.nullrev:

192

continue

192

continue

193

if p not in children:

193

if p not in children:

194

children[p] = [r]

194

children[p] = [r]

195

else:

195

else:

196

children[p].append(r)

196

children[p].append(r)

197

198

roots = set(children) - set(missingrevs)

198

roots = set(children) - set(missingrevs)

199

# 'work' contains 3-tuples of a (revision number, parent number, copies).

199

work = list(roots)

200

# The parent number is only used for knowing which parent the copies dict

200

all_copies = {r: {} for r in roots}

201

# came from.

202

# NOTE: To reduce costly copying the 'copies' dicts, we reuse the same

203

# instance for *one* of the child nodes (the last one). Once an instance

204

# has been put on the queue, it is thus no longer safe to modify it.

205

# Conversely, it *is* safe to modify an instance popped off the queue.

206

work = [(r, 1, {}) for r in roots]

207

heapq.heapify(work)

201

heapq.heapify(work)

208

alwaysmatch = match.always()

202

alwaysmatch = match.always()

209

while work:

203

while work:

210

r, i1, ~~copies~~ = heapq.heappop(work)

204

r = heapq.heappop(work)

211

if work and work[0][0] == r:

205

copies = all_copies.pop(r)

212

# We are tracing copies from both parents

213

r, i2, copies2 = heapq.heappop(work)

214

for dst, src in copies2.items():

215

# Unlike when copies are stored in the filelog, we consider

216

# it a copy even if the destination already existed on the

217

# other branch. It's simply too expensive to check if the

218

# file existed in the manifest.

219

if dst not in copies:

220

# If it was copied on the p1 side, leave it as copied from

221

# that side, even if it was also copied on the p2 side.

222

copies[dst] = copies2[dst]

223

if r == b.rev():

206

if r == b.rev():

224

return copies

207

return copies

225

for i, c in enumerate(children[r]):

208

for i, c in enumerate(children[r]):

226

childctx = repo[c]

209

childctx = repo[c]

227

if r == childctx.p1().rev():

210

if r == childctx.p1().rev():

228

parent = 1

211

parent = 1

229

childcopies = childctx.p1copies()

212

childcopies = childctx.p1copies()

230

else:

213

else:

231

assert r == childctx.p2().rev()

214

assert r == childctx.p2().rev()

232

parent = 2

215

parent = 2

233

childcopies = childctx.p2copies()

216

childcopies = childctx.p2copies()

234

if not alwaysmatch:

217

if not alwaysmatch:

235

childcopies = {

218

childcopies = {

236

dst: src for dst, src in childcopies.items() if match(dst)

219

dst: src for dst, src in childcopies.items() if match(dst)

237

}

220

}

238

# Copy the dict only if later iterations will also need it

221

# Copy the dict only if later iterations will also need it

239

if i != len(children[r]) - 1:

222

if i != len(children[r]) - 1:

240

newcopies = copies.copy()

223

newcopies = copies.copy()

241

else:

224

else:

242

newcopies = copies

225

newcopies = copies

243

if childcopies:

226

if childcopies:

244

newcopies = _chain(newcopies, childcopies)

227

newcopies = _chain(newcopies, childcopies)

245

for f in childctx.filesremoved():

228

for f in childctx.filesremoved():

246

if f in newcopies:

229

if f in newcopies:

247

del newcopies[f]

230

del newcopies[f]

248

heapq.heappush(work, (c, parent, newcopies))

231

othercopies = all_copies.get(c)

232

if othercopies is None:

233

heapq.heappush(work, c)

234

all_copies[c] = newcopies

235

else:

236

# we are the second parent to work on c, we need to merge our

237

# work with the other.

238

#

239

# Unlike when copies are stored in the filelog, we consider

240

# it a copy even if the destination already existed on the

241

# other branch. It's simply too expensive to check if the

242

# file existed in the manifest.

243

#

244

# In case of conflict, parent 1 take precedence over parent 2.

245

# This is an arbitrary choice made anew when implementing

246

# changeset based copies. It was made without regards with

247

# potential filelog related behavior.

248

if parent == 1:

249

othercopies.update(newcopies)

250

else:

251

newcopies.update(othercopies)

252

all_copies[c] = newcopies

249

assert False

253

assert False

250

254

251

255

252

def _forwardcopies(a, b, base=None, match=None):

256

def _forwardcopies(a, b, base=None, match=None):

253

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

257

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

254

258

255

if base is None:

259

if base is None:

256

base = a

260

base = a

257

match = a.repo().narrowmatch(match)

261

match = a.repo().narrowmatch(match)

258

# check for working copy

262

# check for working copy

259

if b.rev() is None:

263

if b.rev() is None:

260

cm = _committedforwardcopies(a, b.p1(), base, match)

264

cm = _committedforwardcopies(a, b.p1(), base, match)

261

# combine copies from dirstate if necessary

265

# combine copies from dirstate if necessary

262

copies = _chain(cm, _dirstatecopies(b._repo, match))

266

copies = _chain(cm, _dirstatecopies(b._repo, match))

263

else:

267

else:

264

copies = _committedforwardcopies(a, b, base, match)

268

copies = _committedforwardcopies(a, b, base, match)

265

return copies

269

return copies

266

270

267

271

268

def _backwardrenames(a, b, match):

272

def _backwardrenames(a, b, match):

269

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

273

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

270

return {}

274

return {}

271

275

272

# Even though we're not taking copies into account, 1:n rename situations

276

# Even though we're not taking copies into account, 1:n rename situations

273

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

277

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

274

# arbitrarily pick one of the renames.

278

# arbitrarily pick one of the renames.

275

# We don't want to pass in "match" here, since that would filter

279

# We don't want to pass in "match" here, since that would filter

276

# the destination by it. Since we're reversing the copies, we want

280

# the destination by it. Since we're reversing the copies, we want

277

# to filter the source instead.

281

# to filter the source instead.

278

f = _forwardcopies(b, a)

282

f = _forwardcopies(b, a)

279

r = {}

283

r = {}

280

for k, v in sorted(pycompat.iteritems(f)):

284

for k, v in sorted(pycompat.iteritems(f)):

281

if match and not match(v):

285

if match and not match(v):

282

continue

286

continue

283

# remove copies

287

# remove copies

284

if v in a:

288

if v in a:

285

continue

289

continue

286

r[v] = k

290

r[v] = k

287

return r

291

return r

288

292

289

293

290

def pathcopies(x, y, match=None):

294

def pathcopies(x, y, match=None):

291

"""find {dst@y: src@x} copy mapping for directed compare"""

295

"""find {dst@y: src@x} copy mapping for directed compare"""

292

repo = x._repo

296

repo = x._repo

293

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

297

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

294

if debug:

298

if debug:

295

repo.ui.debug(

299

repo.ui.debug(

296

b'debug.copies: searching copies from %s to %s\n' % (x, y)

300

b'debug.copies: searching copies from %s to %s\n' % (x, y)

297

)

301

)

298

if x == y or not x or not y:

302

if x == y or not x or not y:

299

return {}

303

return {}

300

a = y.ancestor(x)

304

a = y.ancestor(x)

301

if a == x:

305

if a == x:

302

if debug:

306

if debug:

303

repo.ui.debug(b'debug.copies: search mode: forward\n')

307

repo.ui.debug(b'debug.copies: search mode: forward\n')

304

if y.rev() is None and x == y.p1():

308

if y.rev() is None and x == y.p1():

305

# short-circuit to avoid issues with merge states

309

# short-circuit to avoid issues with merge states

306

return _dirstatecopies(repo, match)

310

return _dirstatecopies(repo, match)

307

copies = _forwardcopies(x, y, match=match)

311

copies = _forwardcopies(x, y, match=match)

308

elif a == y:

312

elif a == y:

309

if debug:

313

if debug:

310

repo.ui.debug(b'debug.copies: search mode: backward\n')

314

repo.ui.debug(b'debug.copies: search mode: backward\n')

311

copies = _backwardrenames(x, y, match=match)

315

copies = _backwardrenames(x, y, match=match)

312

else:

316

else:

313

if debug:

317

if debug:

314

repo.ui.debug(b'debug.copies: search mode: combined\n')

318

repo.ui.debug(b'debug.copies: search mode: combined\n')

315

base = None

319

base = None

316

if a.rev() != node.nullrev:

320

if a.rev() != node.nullrev:

317

base = x

321

base = x

318

copies = _chain(

322

copies = _chain(

319

_backwardrenames(x, a, match=match),

323

_backwardrenames(x, a, match=match),

320

_forwardcopies(a, y, base, match=match),

324

_forwardcopies(a, y, base, match=match),

321

)

325

)

322

_filter(x, y, copies)

326

_filter(x, y, copies)

323

return copies

327

return copies

324

328

325

329

326

def mergecopies(repo, c1, c2, base):

330

def mergecopies(repo, c1, c2, base):

327

"""

331

"""

328

Finds moves and copies between context c1 and c2 that are relevant for

332

Finds moves and copies between context c1 and c2 that are relevant for

329

merging. 'base' will be used as the merge base.

333

merging. 'base' will be used as the merge base.

330

334

331

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

335

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

332

files that were moved/ copied in one merge parent and modified in another.

336

files that were moved/ copied in one merge parent and modified in another.

333

For example:

337

For example:

334

338

335

o ---> 4 another commit

339

o ---> 4 another commit

336

|

340

|

337

| o ---> 3 commit that modifies a.txt

341

| o ---> 3 commit that modifies a.txt

338

| /

342

| /

339

o / ---> 2 commit that moves a.txt to b.txt

343

o / ---> 2 commit that moves a.txt to b.txt

340

|/

344

|/

341

o ---> 1 merge base

345

o ---> 1 merge base

342

346

343

If we try to rebase revision 3 on revision 4, since there is no a.txt in

347

If we try to rebase revision 3 on revision 4, since there is no a.txt in

344

revision 4, and if user have copytrace disabled, we prints the following

348

revision 4, and if user have copytrace disabled, we prints the following

345

message:

349

message:

346

350

347

```other changed <file> which local deleted```

351

```other changed <file> which local deleted```

348

352

349

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

353

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

350

"dirmove".

354

"dirmove".

351

355

352

"copy" is a mapping from destination name -> source name,

356

"copy" is a mapping from destination name -> source name,

353

where source is in c1 and destination is in c2 or vice-versa.

357

where source is in c1 and destination is in c2 or vice-versa.

354

358

355

"movewithdir" is a mapping from source name -> destination name,

359

"movewithdir" is a mapping from source name -> destination name,

356

where the file at source present in one context but not the other

360

where the file at source present in one context but not the other

357

needs to be moved to destination by the merge process, because the

361

needs to be moved to destination by the merge process, because the

358

other context moved the directory it is in.

362

other context moved the directory it is in.

359

363

360

"diverge" is a mapping of source name -> list of destination names

364

"diverge" is a mapping of source name -> list of destination names

361

for divergent renames.

365

for divergent renames.

362

366

363

"renamedelete" is a mapping of source name -> list of destination

367

"renamedelete" is a mapping of source name -> list of destination

364

names for files deleted in c1 that were renamed in c2 or vice-versa.

368

names for files deleted in c1 that were renamed in c2 or vice-versa.

365

369

366

"dirmove" is a mapping of detected source dir -> destination dir renames.

370

"dirmove" is a mapping of detected source dir -> destination dir renames.

367

This is needed for handling changes to new files previously grafted into

371

This is needed for handling changes to new files previously grafted into

368

renamed directories.

372

renamed directories.

369

373

370

This function calls different copytracing algorithms based on config.

374

This function calls different copytracing algorithms based on config.

371

"""

375

"""

372

# avoid silly behavior for update from empty dir

376

# avoid silly behavior for update from empty dir

373

if not c1 or not c2 or c1 == c2:

377

if not c1 or not c2 or c1 == c2:

374

return {}, {}, {}, {}, {}

378

return {}, {}, {}, {}, {}

375

379

376

narrowmatch = c1.repo().narrowmatch()

380

narrowmatch = c1.repo().narrowmatch()

377

381

378

# avoid silly behavior for parent -> working dir

382

# avoid silly behavior for parent -> working dir

379

if c2.node() is None and c1.node() == repo.dirstate.p1():

383

if c2.node() is None and c1.node() == repo.dirstate.p1():

380

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

384

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

381

385

382

copytracing = repo.ui.config(b'experimental', b'copytrace')

386

copytracing = repo.ui.config(b'experimental', b'copytrace')

383

if stringutil.parsebool(copytracing) is False:

387

if stringutil.parsebool(copytracing) is False:

384

# stringutil.parsebool() returns None when it is unable to parse the

388

# stringutil.parsebool() returns None when it is unable to parse the

385

# value, so we should rely on making sure copytracing is on such cases

389

# value, so we should rely on making sure copytracing is on such cases

386

return {}, {}, {}, {}, {}

390

return {}, {}, {}, {}, {}

387

391

388

if usechangesetcentricalgo(repo):

392

if usechangesetcentricalgo(repo):

389

# The heuristics don't make sense when we need changeset-centric algos

393

# The heuristics don't make sense when we need changeset-centric algos

390

return _fullcopytracing(repo, c1, c2, base)

394

return _fullcopytracing(repo, c1, c2, base)

391

395

392

# Copy trace disabling is explicitly below the node == p1 logic above

396

# Copy trace disabling is explicitly below the node == p1 logic above

393

# because the logic above is required for a simple copy to be kept across a

397

# because the logic above is required for a simple copy to be kept across a

394

# rebase.

398

# rebase.

395

if copytracing == b'heuristics':

399

if copytracing == b'heuristics':

396

# Do full copytracing if only non-public revisions are involved as

400

# Do full copytracing if only non-public revisions are involved as

397

# that will be fast enough and will also cover the copies which could

401

# that will be fast enough and will also cover the copies which could

398

# be missed by heuristics

402

# be missed by heuristics

399

if _isfullcopytraceable(repo, c1, base):

403

if _isfullcopytraceable(repo, c1, base):

400

return _fullcopytracing(repo, c1, c2, base)

404

return _fullcopytracing(repo, c1, c2, base)

401

return _heuristicscopytracing(repo, c1, c2, base)

405

return _heuristicscopytracing(repo, c1, c2, base)

402

else:

406

else:

403

return _fullcopytracing(repo, c1, c2, base)

407

return _fullcopytracing(repo, c1, c2, base)

404

408

405

409

406

def _isfullcopytraceable(repo, c1, base):

410

def _isfullcopytraceable(repo, c1, base):

407

""" Checks that if base, source and destination are all no-public branches,

411

""" Checks that if base, source and destination are all no-public branches,

408

if yes let's use the full copytrace algorithm for increased capabilities

412

if yes let's use the full copytrace algorithm for increased capabilities

409

since it will be fast enough.

413

since it will be fast enough.

410

414

411

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

415

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

412

number of changesets from c1 to base such that if number of changesets are

416

number of changesets from c1 to base such that if number of changesets are

413

more than the limit, full copytracing algorithm won't be used.

417

more than the limit, full copytracing algorithm won't be used.

414

"""

418

"""

415

if c1.rev() is None:

419

if c1.rev() is None:

416

c1 = c1.p1()

420

c1 = c1.p1()

417

if c1.mutable() and base.mutable():

421

if c1.mutable() and base.mutable():

418

sourcecommitlimit = repo.ui.configint(

422

sourcecommitlimit = repo.ui.configint(

419

b'experimental', b'copytrace.sourcecommitlimit'

423

b'experimental', b'copytrace.sourcecommitlimit'

420

)

424

)

421

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

425

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

422

return commits < sourcecommitlimit

426

return commits < sourcecommitlimit

423

return False

427

return False

424

428

425

429

426

def _checksinglesidecopies(

430

def _checksinglesidecopies(

427

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

431

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

428

):

432

):

429

if src not in m2:

433

if src not in m2:

430

# deleted on side 2

434

# deleted on side 2

431

if src not in m1:

435

if src not in m1:

432

# renamed on side 1, deleted on side 2

436

# renamed on side 1, deleted on side 2

433

renamedelete[src] = dsts1

437

renamedelete[src] = dsts1

434

elif m2[src] != mb[src]:

438

elif m2[src] != mb[src]:

435

if not _related(c2[src], base[src]):

439

if not _related(c2[src], base[src]):

436

return

440

return

437

# modified on side 2

441

# modified on side 2

438

for dst in dsts1:

442

for dst in dsts1:

439

if dst not in m2:

443

if dst not in m2:

440

# dst not added on side 2 (handle as regular

444

# dst not added on side 2 (handle as regular

441

# "both created" case in manifestmerge otherwise)

445

# "both created" case in manifestmerge otherwise)

442

copy[dst] = src

446

copy[dst] = src

443

447

444

448

445

def _fullcopytracing(repo, c1, c2, base):

449

def _fullcopytracing(repo, c1, c2, base):

446

""" The full copytracing algorithm which finds all the new files that were

450

""" The full copytracing algorithm which finds all the new files that were

447

added from merge base up to the top commit and for each file it checks if

451

added from merge base up to the top commit and for each file it checks if

448

this file was copied from another file.

452

this file was copied from another file.

449

453

450

This is pretty slow when a lot of changesets are involved but will track all

454

This is pretty slow when a lot of changesets are involved but will track all

451

the copies.

455

the copies.

452

"""

456

"""

453

m1 = c1.manifest()

457

m1 = c1.manifest()

454

m2 = c2.manifest()

458

m2 = c2.manifest()

455

mb = base.manifest()

459

mb = base.manifest()

456

460

457

copies1 = pathcopies(base, c1)

461

copies1 = pathcopies(base, c1)

458

copies2 = pathcopies(base, c2)

462

copies2 = pathcopies(base, c2)

459

463

460

inversecopies1 = {}

464

inversecopies1 = {}

461

inversecopies2 = {}

465

inversecopies2 = {}

462

for dst, src in copies1.items():

466

for dst, src in copies1.items():

463

inversecopies1.setdefault(src, []).append(dst)

467

inversecopies1.setdefault(src, []).append(dst)

464

for dst, src in copies2.items():

468

for dst, src in copies2.items():

465

inversecopies2.setdefault(src, []).append(dst)

469

inversecopies2.setdefault(src, []).append(dst)

466

470

467

copy = {}

471

copy = {}

468

diverge = {}

472

diverge = {}

469

renamedelete = {}

473

renamedelete = {}

470

allsources = set(inversecopies1) | set(inversecopies2)

474

allsources = set(inversecopies1) | set(inversecopies2)

471

for src in allsources:

475

for src in allsources:

472

dsts1 = inversecopies1.get(src)

476

dsts1 = inversecopies1.get(src)

473

dsts2 = inversecopies2.get(src)

477

dsts2 = inversecopies2.get(src)

474

if dsts1 and dsts2:

478

if dsts1 and dsts2:

475

# copied/renamed on both sides

479

# copied/renamed on both sides

476

if src not in m1 and src not in m2:

480

if src not in m1 and src not in m2:

477

# renamed on both sides

481

# renamed on both sides

478

dsts1 = set(dsts1)

482

dsts1 = set(dsts1)

479

dsts2 = set(dsts2)

483

dsts2 = set(dsts2)

480

# If there's some overlap in the rename destinations, we

484

# If there's some overlap in the rename destinations, we

481

# consider it not divergent. For example, if side 1 copies 'a'

485

# consider it not divergent. For example, if side 1 copies 'a'

482

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

486

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

483

# and 'd' and deletes 'a'.

487

# and 'd' and deletes 'a'.

484

if dsts1 & dsts2:

488

if dsts1 & dsts2:

485

for dst in dsts1 & dsts2:

489

for dst in dsts1 & dsts2:

486

copy[dst] = src

490

copy[dst] = src

487

else:

491

else:

488

diverge[src] = sorted(dsts1 | dsts2)

492

diverge[src] = sorted(dsts1 | dsts2)

489

elif src in m1 and src in m2:

493

elif src in m1 and src in m2:

490

# copied on both sides

494

# copied on both sides

491

dsts1 = set(dsts1)

495

dsts1 = set(dsts1)

492

dsts2 = set(dsts2)

496

dsts2 = set(dsts2)

493

for dst in dsts1 & dsts2:

497

for dst in dsts1 & dsts2:

494

copy[dst] = src

498

copy[dst] = src

495

# TODO: Handle cases where it was renamed on one side and copied

499

# TODO: Handle cases where it was renamed on one side and copied

496

# on the other side

500

# on the other side

497

elif dsts1:

501

elif dsts1:

498

# copied/renamed only on side 1

502

# copied/renamed only on side 1

499

_checksinglesidecopies(

503

_checksinglesidecopies(

500

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

504

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

501

)

505

)

502

elif dsts2:

506

elif dsts2:

503

# copied/renamed only on side 2

507

# copied/renamed only on side 2

504

_checksinglesidecopies(

508

_checksinglesidecopies(

505

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

509

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

506

)

510

)

507

511

508

renamedeleteset = set()

512

renamedeleteset = set()

509

divergeset = set()

513

divergeset = set()

510

for dsts in diverge.values():

514

for dsts in diverge.values():

511

divergeset.update(dsts)

515

divergeset.update(dsts)

512

for dsts in renamedelete.values():

516

for dsts in renamedelete.values():

513

renamedeleteset.update(dsts)

517

renamedeleteset.update(dsts)

514

518

515

# find interesting file sets from manifests

519

# find interesting file sets from manifests

516

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

520

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

517

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

521

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

518

u1 = sorted(addedinm1 - addedinm2)

522

u1 = sorted(addedinm1 - addedinm2)

519

u2 = sorted(addedinm2 - addedinm1)

523

u2 = sorted(addedinm2 - addedinm1)

520

524

521

header = b" unmatched files in %s"

525

header = b" unmatched files in %s"

522

if u1:

526

if u1:

523

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

527

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

524

if u2:

528

if u2:

525

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

529

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

526

530

527

fullcopy = copies1.copy()

531

fullcopy = copies1.copy()

528

fullcopy.update(copies2)

532

fullcopy.update(copies2)

529

if not fullcopy:

533

if not fullcopy:

530

return copy, {}, diverge, renamedelete, {}

534

return copy, {}, diverge, renamedelete, {}

531

535

532

if repo.ui.debugflag:

536

if repo.ui.debugflag:

533

repo.ui.debug(

537

repo.ui.debug(

534

b" all copies found (* = to merge, ! = divergent, "

538

b" all copies found (* = to merge, ! = divergent, "

535

b"% = renamed and deleted):\n"

539

b"% = renamed and deleted):\n"

536

)

540

)

537

for f in sorted(fullcopy):

541

for f in sorted(fullcopy):

538

note = b""

542

note = b""

539

if f in copy:

543

if f in copy:

540

note += b"*"

544

note += b"*"

541

if f in divergeset:

545

if f in divergeset:

542

note += b"!"

546

note += b"!"

543

if f in renamedeleteset:

547

if f in renamedeleteset:

544

note += b"%"

548

note += b"%"

545

repo.ui.debug(

549

repo.ui.debug(

546

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

550

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

547

)

551

)

548

del divergeset

552

del divergeset

549

553

550

repo.ui.debug(b" checking for directory renames\n")

554

repo.ui.debug(b" checking for directory renames\n")

551

555

552

# generate a directory move map

556

# generate a directory move map

553

d1, d2 = c1.dirs(), c2.dirs()

557

d1, d2 = c1.dirs(), c2.dirs()

554

invalid = set()

558

invalid = set()

555

dirmove = {}

559

dirmove = {}

556

560

557

# examine each file copy for a potential directory move, which is

561

# examine each file copy for a potential directory move, which is

558

# when all the files in a directory are moved to a new directory

562

# when all the files in a directory are moved to a new directory

559

for dst, src in pycompat.iteritems(fullcopy):

563

for dst, src in pycompat.iteritems(fullcopy):

560

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

564

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

561

if dsrc in invalid:

565

if dsrc in invalid:

562

# already seen to be uninteresting

566

# already seen to be uninteresting

563

continue

567

continue

564

elif dsrc in d1 and ddst in d1:

568

elif dsrc in d1 and ddst in d1:

565

# directory wasn't entirely moved locally

569

# directory wasn't entirely moved locally

566

invalid.add(dsrc)

570

invalid.add(dsrc)

567

elif dsrc in d2 and ddst in d2:

571

elif dsrc in d2 and ddst in d2:

568

# directory wasn't entirely moved remotely

572

# directory wasn't entirely moved remotely

569

invalid.add(dsrc)

573

invalid.add(dsrc)

570

elif dsrc in dirmove and dirmove[dsrc] != ddst:

574

elif dsrc in dirmove and dirmove[dsrc] != ddst:

571

# files from the same directory moved to two different places

575

# files from the same directory moved to two different places

572

invalid.add(dsrc)

576

invalid.add(dsrc)

573

else:

577

else:

574

# looks good so far

578

# looks good so far

575

dirmove[dsrc] = ddst

579

dirmove[dsrc] = ddst

576

580

577

for i in invalid:

581

for i in invalid:

578

if i in dirmove:

582

if i in dirmove:

579

del dirmove[i]

583

del dirmove[i]

580

del d1, d2, invalid

584

del d1, d2, invalid

581

585

582

if not dirmove:

586

if not dirmove:

583

return copy, {}, diverge, renamedelete, {}

587

return copy, {}, diverge, renamedelete, {}

584

588

585

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

589

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

586

590

587

for d in dirmove:

591

for d in dirmove:

588

repo.ui.debug(

592

repo.ui.debug(

589

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

593

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

590

)

594

)

591

595

592

movewithdir = {}

596

movewithdir = {}

593

# check unaccounted nonoverlapping files against directory moves

597

# check unaccounted nonoverlapping files against directory moves

594

for f in u1 + u2:

598

for f in u1 + u2:

595

if f not in fullcopy:

599

if f not in fullcopy:

596

for d in dirmove:

600

for d in dirmove:

597

if f.startswith(d):

601

if f.startswith(d):

598

# new file added in a directory that was moved, move it

602

# new file added in a directory that was moved, move it

599

df = dirmove[d] + f[len(d) :]

603

df = dirmove[d] + f[len(d) :]

600

if df not in copy:

604

if df not in copy:

601

movewithdir[f] = df

605

movewithdir[f] = df

602

repo.ui.debug(

606

repo.ui.debug(

603

b" pending file src: '%s' -> dst: '%s'\n"

607

b" pending file src: '%s' -> dst: '%s'\n"

604

% (f, df)

608

% (f, df)

605

)

609

)

606

break

610

break

607

611

608

return copy, movewithdir, diverge, renamedelete, dirmove

612

return copy, movewithdir, diverge, renamedelete, dirmove

609

613

610

614

611

def _heuristicscopytracing(repo, c1, c2, base):

615

def _heuristicscopytracing(repo, c1, c2, base):

612

""" Fast copytracing using filename heuristics

616

""" Fast copytracing using filename heuristics

613

617

614

Assumes that moves or renames are of following two types:

618

Assumes that moves or renames are of following two types:

615

619

616

1) Inside a directory only (same directory name but different filenames)

620

1) Inside a directory only (same directory name but different filenames)

617

2) Move from one directory to another

621

2) Move from one directory to another

618

(same filenames but different directory names)

622

(same filenames but different directory names)

619

623

620

Works only when there are no merge commits in the "source branch".

624

Works only when there are no merge commits in the "source branch".

621

Source branch is commits from base up to c2 not including base.

625

Source branch is commits from base up to c2 not including base.

622

626

623

If merge is involved it fallbacks to _fullcopytracing().

627

If merge is involved it fallbacks to _fullcopytracing().

624

628

625

Can be used by setting the following config:

629

Can be used by setting the following config:

626

630

627

[experimental]

631

[experimental]

628

copytrace = heuristics

632

copytrace = heuristics

629

633

630

In some cases the copy/move candidates found by heuristics can be very large

634

In some cases the copy/move candidates found by heuristics can be very large

631

in number and that will make the algorithm slow. The number of possible

635

in number and that will make the algorithm slow. The number of possible

632

candidates to check can be limited by using the config

636

candidates to check can be limited by using the config

633

`experimental.copytrace.movecandidateslimit` which defaults to 100.

637

`experimental.copytrace.movecandidateslimit` which defaults to 100.

634

"""

638

"""

635

639

636

if c1.rev() is None:

640

if c1.rev() is None:

637

c1 = c1.p1()

641

c1 = c1.p1()

638

if c2.rev() is None:

642

if c2.rev() is None:

639

c2 = c2.p1()

643

c2 = c2.p1()

640

644

641

copies = {}

645

copies = {}

642

646

643

changedfiles = set()

647

changedfiles = set()

644

m1 = c1.manifest()

648

m1 = c1.manifest()

645

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

649

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

646

# If base is not in c2 branch, we switch to fullcopytracing

650

# If base is not in c2 branch, we switch to fullcopytracing

647

repo.ui.debug(

651

repo.ui.debug(

648

b"switching to full copytracing as base is not "

652

b"switching to full copytracing as base is not "

649

b"an ancestor of c2\n"

653

b"an ancestor of c2\n"

650

)

654

)

651

return _fullcopytracing(repo, c1, c2, base)

655

return _fullcopytracing(repo, c1, c2, base)

652

656

653

ctx = c2

657

ctx = c2

654

while ctx != base:

658

while ctx != base:

655

if len(ctx.parents()) == 2:

659

if len(ctx.parents()) == 2:

656

# To keep things simple let's not handle merges

660

# To keep things simple let's not handle merges

657

repo.ui.debug(b"switching to full copytracing because of merges\n")

661

repo.ui.debug(b"switching to full copytracing because of merges\n")

658

return _fullcopytracing(repo, c1, c2, base)

662

return _fullcopytracing(repo, c1, c2, base)

659

changedfiles.update(ctx.files())

663

changedfiles.update(ctx.files())

660

ctx = ctx.p1()

664

ctx = ctx.p1()

661

665

662

cp = _forwardcopies(base, c2)

666

cp = _forwardcopies(base, c2)

663

for dst, src in pycompat.iteritems(cp):

667

for dst, src in pycompat.iteritems(cp):

664

if src in m1:

668

if src in m1:

665

copies[dst] = src

669

copies[dst] = src

666

670

667

# file is missing if it isn't present in the destination, but is present in

671

# file is missing if it isn't present in the destination, but is present in

668

# the base and present in the source.

672

# the base and present in the source.

669

# Presence in the base is important to exclude added files, presence in the

673

# Presence in the base is important to exclude added files, presence in the

670

# source is important to exclude removed files.

674

# source is important to exclude removed files.

671

filt = lambda f: f not in m1 and f in base and f in c2

675

filt = lambda f: f not in m1 and f in base and f in c2

672

missingfiles = [f for f in changedfiles if filt(f)]

676

missingfiles = [f for f in changedfiles if filt(f)]

673

677

674

if missingfiles:

678

if missingfiles:

675

basenametofilename = collections.defaultdict(list)

679

basenametofilename = collections.defaultdict(list)

676

dirnametofilename = collections.defaultdict(list)

680

dirnametofilename = collections.defaultdict(list)

677

681

678

for f in m1.filesnotin(base.manifest()):

682

for f in m1.filesnotin(base.manifest()):

679

basename = os.path.basename(f)

683

basename = os.path.basename(f)

680

dirname = os.path.dirname(f)

684

dirname = os.path.dirname(f)

681

basenametofilename[basename].append(f)

685

basenametofilename[basename].append(f)

682

dirnametofilename[dirname].append(f)

686

dirnametofilename[dirname].append(f)

683

687

684

for f in missingfiles:

688

for f in missingfiles:

685

basename = os.path.basename(f)

689

basename = os.path.basename(f)

686

dirname = os.path.dirname(f)

690

dirname = os.path.dirname(f)

687

samebasename = basenametofilename[basename]

691

samebasename = basenametofilename[basename]

688

samedirname = dirnametofilename[dirname]

692

samedirname = dirnametofilename[dirname]

689

movecandidates = samebasename + samedirname

693

movecandidates = samebasename + samedirname

690

# f is guaranteed to be present in c2, that's why

694

# f is guaranteed to be present in c2, that's why

691

# c2.filectx(f) won't fail

695

# c2.filectx(f) won't fail

692

f2 = c2.filectx(f)

696

f2 = c2.filectx(f)

693

# we can have a lot of candidates which can slow down the heuristics

697

# we can have a lot of candidates which can slow down the heuristics

694

# config value to limit the number of candidates moves to check

698

# config value to limit the number of candidates moves to check

695

maxcandidates = repo.ui.configint(

699

maxcandidates = repo.ui.configint(

696

b'experimental', b'copytrace.movecandidateslimit'

700

b'experimental', b'copytrace.movecandidateslimit'

697

)

701

)

698

702

699

if len(movecandidates) > maxcandidates:

703

if len(movecandidates) > maxcandidates:

700

repo.ui.status(

704

repo.ui.status(

701

_(

705

_(

702

b"skipping copytracing for '%s', more "

706

b"skipping copytracing for '%s', more "

703

b"candidates than the limit: %d\n"

707

b"candidates than the limit: %d\n"

704

)

708

)

705

% (f, len(movecandidates))

709

% (f, len(movecandidates))

706

)

710

)

707

continue

711

continue

708

712

709

for candidate in movecandidates:

713

for candidate in movecandidates:

710

f1 = c1.filectx(candidate)

714

f1 = c1.filectx(candidate)

711

if _related(f1, f2):

715

if _related(f1, f2):

712

# if there are a few related copies then we'll merge

716

# if there are a few related copies then we'll merge

713

# changes into all of them. This matches the behaviour

717

# changes into all of them. This matches the behaviour

714

# of upstream copytracing

718

# of upstream copytracing

715

copies[candidate] = f

719

copies[candidate] = f

716

720

717

return copies, {}, {}, {}, {}

721

return copies, {}, {}, {}, {}

718

722

719

723

720

def _related(f1, f2):

724

def _related(f1, f2):

721

"""return True if f1 and f2 filectx have a common ancestor

725

"""return True if f1 and f2 filectx have a common ancestor

722

726

723

Walk back to common ancestor to see if the two files originate

727

Walk back to common ancestor to see if the two files originate

724

from the same file. Since workingfilectx's rev() is None it messes

728

from the same file. Since workingfilectx's rev() is None it messes

725

up the integer comparison logic, hence the pre-step check for

729

up the integer comparison logic, hence the pre-step check for

726

None (f1 and f2 can only be workingfilectx's initially).

730

None (f1 and f2 can only be workingfilectx's initially).

727

"""

731

"""

728

732

729

if f1 == f2:

733

if f1 == f2:

730

return True # a match

734

return True # a match

731

735

732

g1, g2 = f1.ancestors(), f2.ancestors()

736

g1, g2 = f1.ancestors(), f2.ancestors()

733

try:

737

try:

734

f1r, f2r = f1.linkrev(), f2.linkrev()

738

f1r, f2r = f1.linkrev(), f2.linkrev()

735

739

736

if f1r is None:

740

if f1r is None:

737

f1 = next(g1)

741

f1 = next(g1)

738

if f2r is None:

742

if f2r is None:

739

f2 = next(g2)

743

f2 = next(g2)

740

744

741

while True:

745

while True:

742

f1r, f2r = f1.linkrev(), f2.linkrev()

746

f1r, f2r = f1.linkrev(), f2.linkrev()

743

if f1r > f2r:

747

if f1r > f2r:

744

f1 = next(g1)

748

f1 = next(g1)

745

elif f2r > f1r:

749

elif f2r > f1r:

746

f2 = next(g2)

750

f2 = next(g2)

747

else: # f1 and f2 point to files in the same linkrev

751

else: # f1 and f2 point to files in the same linkrev

748

return f1 == f2 # true if they point to the same file

752

return f1 == f2 # true if they point to the same file

749

except StopIteration:

753

except StopIteration:

750

return False

754

return False

751

755

752

756

753

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

757

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

754

"""reproduce copies from fromrev to rev in the dirstate

758

"""reproduce copies from fromrev to rev in the dirstate

755

759

756

If skiprev is specified, it's a revision that should be used to

760

If skiprev is specified, it's a revision that should be used to

757

filter copy records. Any copies that occur between fromrev and

761

filter copy records. Any copies that occur between fromrev and

758

skiprev will not be duplicated, even if they appear in the set of

762

skiprev will not be duplicated, even if they appear in the set of

759

copies between fromrev and rev.

763

copies between fromrev and rev.

760

"""

764

"""

761

exclude = {}

765

exclude = {}

762

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

766

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

763

bctrace = stringutil.parsebool(ctraceconfig)

767

bctrace = stringutil.parsebool(ctraceconfig)

764

if skiprev is not None and (

768

if skiprev is not None and (

765

ctraceconfig == b'heuristics' or bctrace or bctrace is None

769

ctraceconfig == b'heuristics' or bctrace or bctrace is None

766

):

770

):

767

# copytrace='off' skips this line, but not the entire function because

771

# copytrace='off' skips this line, but not the entire function because

768

# the line below is O(size of the repo) during a rebase, while the rest

772

# the line below is O(size of the repo) during a rebase, while the rest

769

# of the function is much faster (and is required for carrying copy

773

# of the function is much faster (and is required for carrying copy

770

# metadata across the rebase anyway).

774

# metadata across the rebase anyway).

771

exclude = pathcopies(repo[fromrev], repo[skiprev])

775

exclude = pathcopies(repo[fromrev], repo[skiprev])

772

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

776

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

773

if dst in exclude:

777

if dst in exclude:

774

continue

778

continue

775

if dst in wctx:

779

if dst in wctx:

776

wctx[dst].markcopied(src)

780

wctx[dst].markcopied(src)

777

781

778

782

779

def computechangesetfilesadded(ctx):

783

def computechangesetfilesadded(ctx):

780

"""return the list of files added in a changeset

784

"""return the list of files added in a changeset

781

"""

785

"""

782

added = []

786

added = []

783

for f in ctx.files():

787

for f in ctx.files():

784

if not any(f in p for p in ctx.parents()):

788

if not any(f in p for p in ctx.parents()):

785

added.append(f)

789

added.append(f)

786

return added

790

return added

787

791

788

792

789

def computechangesetfilesremoved(ctx):

793

def computechangesetfilesremoved(ctx):

790

"""return the list of files removed in a changeset

794

"""return the list of files removed in a changeset

791

"""

795

"""

792

removed = []

796

removed = []

793

for f in ctx.files():

797

for f in ctx.files():

794

if f not in ctx:

798

if f not in ctx:

795

removed.append(f)

799

removed.append(f)

796

return removed

800

return removed

797

801

798

802

799

def computechangesetcopies(ctx):

803

def computechangesetcopies(ctx):

800

"""return the copies data for a changeset

804

"""return the copies data for a changeset

801

805

802

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

806

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

803

807

804

Each dictionnary are in the form: `{newname: oldname}`

808

Each dictionnary are in the form: `{newname: oldname}`

805

"""

809

"""

806

p1copies = {}

810

p1copies = {}

807

p2copies = {}

811

p2copies = {}

808

p1 = ctx.p1()

812

p1 = ctx.p1()

809

p2 = ctx.p2()

813

p2 = ctx.p2()

810

narrowmatch = ctx._repo.narrowmatch()

814

narrowmatch = ctx._repo.narrowmatch()

811

for dst in ctx.files():

815

for dst in ctx.files():

812

if not narrowmatch(dst) or dst not in ctx:

816

if not narrowmatch(dst) or dst not in ctx:

813

continue

817

continue

814

copied = ctx[dst].renamed()

818

copied = ctx[dst].renamed()

815

if not copied:

819

if not copied:

816

continue

820

continue

817

src, srcnode = copied

821

src, srcnode = copied

818

if src in p1 and p1[src].filenode() == srcnode:

822

if src in p1 and p1[src].filenode() == srcnode:

819

p1copies[dst] = src

823

p1copies[dst] = src

820

elif src in p2 and p2[src].filenode() == srcnode:

824

elif src in p2 and p2[src].filenode() == srcnode:

821

p2copies[dst] = src

825

p2copies[dst] = src

822

return p1copies, p2copies

826

return p1copies, p2copies

823

827

824

828

825

def encodecopies(files, copies):

829

def encodecopies(files, copies):

826

items = []

830

items = []

827

for i, dst in enumerate(files):

831

for i, dst in enumerate(files):

828

if dst in copies:

832

if dst in copies:

829

items.append(b'%d\0%s' % (i, copies[dst]))

833

items.append(b'%d\0%s' % (i, copies[dst]))

830

if len(items) != len(copies):

834

if len(items) != len(copies):

831

raise error.ProgrammingError(

835

raise error.ProgrammingError(

832

b'some copy targets missing from file list'

836

b'some copy targets missing from file list'

833

)

837

)

834

return b"\n".join(items)

838

return b"\n".join(items)

835

839

836

840

837

def decodecopies(files, data):

841

def decodecopies(files, data):

838

try:

842

try:

839

copies = {}

843

copies = {}

840

if not data:

844

if not data:

841

return copies

845

return copies

842

for l in data.split(b'\n'):

846

for l in data.split(b'\n'):

843

strindex, src = l.split(b'\0')

847

strindex, src = l.split(b'\0')

844

i = int(strindex)

848

i = int(strindex)

845

dst = files[i]

849

dst = files[i]

846

copies[dst] = src

850

copies[dst] = src

847

return copies

851

return copies

848

except (ValueError, IndexError):

852

except (ValueError, IndexError):

849

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

853

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

850

# used different syntax for the value.

854

# used different syntax for the value.

851

return None

855

return None

852

856

853

857

854

def encodefileindices(files, subset):

858

def encodefileindices(files, subset):

855

subset = set(subset)

859

subset = set(subset)

856

indices = []

860

indices = []

857

for i, f in enumerate(files):

861

for i, f in enumerate(files):

858

if f in subset:

862

if f in subset:

859

indices.append(b'%d' % i)

863

indices.append(b'%d' % i)

860

return b'\n'.join(indices)

864

return b'\n'.join(indices)

861

865

862

866

863

def decodefileindices(files, data):

867

def decodefileindices(files, data):

864

try:

868

try:

865

subset = []

869

subset = []

866

if not data:

870

if not data:

867

return subset

871

return subset

868

for strindex in data.split(b'\n'):

872

for strindex in data.split(b'\n'):

869

i = int(strindex)

873

i = int(strindex)

870

if i < 0 or i >= len(files):

874

if i < 0 or i >= len(files):

871

return None

875

return None

872

subset.append(files[i])

876

subset.append(files[i])

873

return subset

877

return subset

874

except (ValueError, IndexError):

878

except (ValueError, IndexError):

875

# Perhaps someone had chosen the same key name (e.g. "added") and

879

# Perhaps someone had chosen the same key name (e.g. "added") and

876

# used different syntax for the value.

880

# used different syntax for the value.

877

return None

881

return None

878

882

879

883

880

def _getsidedata(srcrepo, rev):

884

def _getsidedata(srcrepo, rev):

881

ctx = srcrepo[rev]

885

ctx = srcrepo[rev]

882

filescopies = computechangesetcopies(ctx)

886

filescopies = computechangesetcopies(ctx)

883

filesadded = computechangesetfilesadded(ctx)

887

filesadded = computechangesetfilesadded(ctx)

884

filesremoved = computechangesetfilesremoved(ctx)

888

filesremoved = computechangesetfilesremoved(ctx)

885

sidedata = {}

889

sidedata = {}

886

if any([filescopies, filesadded, filesremoved]):

890

if any([filescopies, filesadded, filesremoved]):

887

sortedfiles = sorted(ctx.files())

891

sortedfiles = sorted(ctx.files())

888

p1copies, p2copies = filescopies

892

p1copies, p2copies = filescopies

889

p1copies = encodecopies(sortedfiles, p1copies)

893

p1copies = encodecopies(sortedfiles, p1copies)

890

p2copies = encodecopies(sortedfiles, p2copies)

894

p2copies = encodecopies(sortedfiles, p2copies)

891

filesadded = encodefileindices(sortedfiles, filesadded)

895

filesadded = encodefileindices(sortedfiles, filesadded)

892

filesremoved = encodefileindices(sortedfiles, filesremoved)

896

filesremoved = encodefileindices(sortedfiles, filesremoved)

893

if p1copies:

897

if p1copies:

894

sidedata[sidedatamod.SD_P1COPIES] = p1copies

898

sidedata[sidedatamod.SD_P1COPIES] = p1copies

895

if p2copies:

899

if p2copies:

896

sidedata[sidedatamod.SD_P2COPIES] = p2copies

900

sidedata[sidedatamod.SD_P2COPIES] = p2copies

897

if filesadded:

901

if filesadded:

898

sidedata[sidedatamod.SD_FILESADDED] = filesadded

902

sidedata[sidedatamod.SD_FILESADDED] = filesadded

899

if filesremoved:

903

if filesremoved:

900

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

904

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

901

return sidedata

905

return sidedata

902

906

903

907

904

def getsidedataadder(srcrepo, destrepo):

908

def getsidedataadder(srcrepo, destrepo):

905

def sidedatacompanion(revlog, rev):

909

def sidedatacompanion(revlog, rev):

906

sidedata = {}

910

sidedata = {}

907

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

911

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

908

sidedata = _getsidedata(srcrepo, rev)

912

sidedata = _getsidedata(srcrepo, rev)

909

return False, (), sidedata

913

return False, (), sidedata

910

914

911

return sidedatacompanion

915

return sidedatacompanion

912

916

913

917

914

def getsidedataremover(srcrepo, destrepo):

918

def getsidedataremover(srcrepo, destrepo):

915

def sidedatacompanion(revlog, rev):

919

def sidedatacompanion(revlog, rev):

916

f = ()

920

f = ()

917

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

921

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

918

if revlog.flags(rev) & REVIDX_SIDEDATA:

922

if revlog.flags(rev) & REVIDX_SIDEDATA:

919

f = (

923

f = (

920

sidedatamod.SD_P1COPIES,

924

sidedatamod.SD_P1COPIES,

921

sidedatamod.SD_P2COPIES,

925

sidedatamod.SD_P2COPIES,

922

sidedatamod.SD_FILESADDED,

926

sidedatamod.SD_FILESADDED,

923

sidedatamod.SD_FILESREMOVED,

927

sidedatamod.SD_FILESREMOVED,

924

)

928

)

925

return False, f, {}

929

return False, f, {}

926

930

927

return sidedatacompanion

931

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo()
                 children = {}
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                 roots = set(children) - set(missingrevs)
-                # 'work' contains 3-tuples of a (revision number, parent number, copies).
+                work = list(roots)
-                # The parent number is only used for knowing which parent the copies dict
+                all_copies = {r: {} for r in roots}
-                # came from.
-                # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
-                # instance for *one* of the child nodes (the last one). Once an instance
-                # has been put on the queue, it is thus no longer safe to modify it.
-                # Conversely, it *is* safe to modify an instance popped off the queue.
-                work = [(r, 1, {}) for r in roots]
                 heapq.heapify(work)
                 alwaysmatch = match.always()
                 while work:
-                    r, i1, copies = heapq.heappop(work)
+                    r = heapq.heappop(work)
-                    if work and work[0][0] == r:
+                    copies = all_copies.pop(r)
-                        # We are tracing copies from both parents
-                        r, i2, copies2 = heapq.heappop(work)
-                        for dst, src in copies2.items():
-                            # Unlike when copies are stored in the filelog, we consider
-                            # it a copy even if the destination already existed on the
-                            # other branch. It's simply too expensive to check if the
-                            # file existed in the manifest.
-                            if dst not in copies:
-                                # If it was copied on the p1 side, leave it as copied from
-                                # that side, even if it was also copied on the p2 side.
-                                copies[dst] = copies2[dst]
                     if r == b.rev():
                         return copies
                     for i, c in enumerate(children[r]):
                         childctx = repo[c]
                         if r == childctx.p1().rev():
                             parent = 1
                             childcopies = childctx.p1copies()
                         else:
                             assert r == childctx.p2().rev()
                             parent = 2
                             childcopies = childctx.p2copies()
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         # Copy the dict only if later iterations will also need it
                         if i != len(children[r]) - 1:
                             newcopies = copies.copy()
                         else:
                             newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                         for f in childctx.filesremoved():
                             if f in newcopies:
                                 del newcopies[f]
-                        heapq.heappush(work, (c, parent, newcopies))
+                        othercopies = all_copies.get(c)
+                        if othercopies is None:
+                            heapq.heappush(work, c)
+                            all_copies[c] = newcopies
+                        else:
+                            # we are the second parent to work on c, we need to merge our
+                            # work with the other.
+                            #
+                            # Unlike when copies are stored in the filelog, we consider
+                            # it a copy even if the destination already existed on the
+                            # other branch. It's simply too expensive to check if the
+                            # file existed in the manifest.
+                            #
+                            # In case of conflict, parent 1 take precedence over parent 2.
+                            # This is an arbitrary choice made anew when implementing
+                            # changeset based copies. It was made without regards with
+                            # potential filelog related behavior.
+                            if parent == 1:
+                                othercopies.update(newcopies)
+                            else:
+                                newcopies.update(othercopies)
+                                all_copies[c] = newcopies
                 assert False
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion