upstream/mercurial-mirror Commit - r43548:181d28ba

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(a, b):

67

def _chain(a, b):

68

"""chain two sets of copies 'a' and 'b'"""

68

"""chain two sets of copies 'a' and 'b'"""

69

t = a.copy()

69

t = a.copy()

70

for k, v in pycompat.iteritems(b):

70

for k, v in pycompat.iteritems(b):

71

if v in t:

71

if v in t:

72

t[k] = t[v]

72

t[k] = t[v]

73

else:

73

else:

74

t[k] = v

74

t[k] = v

75

return t

75

return t

76

77

78

def _tracefile(fctx, am, basemf):

78

def _tracefile(fctx, am, basemf):

79

"""return file context that is the ancestor of fctx present in ancestor

79

"""return file context that is the ancestor of fctx present in ancestor

80

manifest am

80

manifest am

81

82

Note: we used to try and stop after a given limit, however checking if that

82

Note: we used to try and stop after a given limit, however checking if that

83

limit is reached turned out to be very expensive. we are better off

83

limit is reached turned out to be very expensive. we are better off

84

disabling that feature."""

84

disabling that feature."""

85

86

for f in fctx.ancestors():

86

for f in fctx.ancestors():

87

path = f.path()

87

path = f.path()

88

if am.get(path, None) == f.filenode():

88

if am.get(path, None) == f.filenode():

89

return path

89

return path

90

if basemf and basemf.get(path, None) == f.filenode():

90

if basemf and basemf.get(path, None) == f.filenode():

91

return path

91

return path

92

93

94

def _dirstatecopies(repo, match=None):

94

def _dirstatecopies(repo, match=None):

95

ds = repo.dirstate

95

ds = repo.dirstate

96

c = ds.copies().copy()

96

c = ds.copies().copy()

97

for k in list(c):

97

for k in list(c):

98

if ds[k] not in b'anm' or (match and not match(k)):

98

if ds[k] not in b'anm' or (match and not match(k)):

99

del c[k]

99

del c[k]

100

return c

100

return c

101

102

103

def _computeforwardmissing(a, b, match=None):

103

def _computeforwardmissing(a, b, match=None):

104

"""Computes which files are in b but not a.

104

"""Computes which files are in b but not a.

105

This is its own function so extensions can easily wrap this call to see what

105

This is its own function so extensions can easily wrap this call to see what

106

files _forwardcopies is about to process.

106

files _forwardcopies is about to process.

107

"""

107

"""

108

ma = a.manifest()

108

ma = a.manifest()

109

mb = b.manifest()

109

mb = b.manifest()

110

return mb.filesnotin(ma, match=match)

110

return mb.filesnotin(ma, match=match)

111

112

113

def usechangesetcentricalgo(repo):

113

def usechangesetcentricalgo(repo):

114

"""Checks if we should use changeset-centric copy algorithms"""

114

"""Checks if we should use changeset-centric copy algorithms"""

115

if repo.filecopiesmode == b'changeset-sidedata':

115

if repo.filecopiesmode == b'changeset-sidedata':

116

return True

116

return True

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

118

changesetsource = (b'changeset-only', b'compatibility')

118

changesetsource = (b'changeset-only', b'compatibility')

119

return readfrom in changesetsource

119

return readfrom in changesetsource

120

121

122

def _committedforwardcopies(a, b, base, match):

122

def _committedforwardcopies(a, b, base, match):

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

124

# files might have to be traced back to the fctx parent of the last

124

# files might have to be traced back to the fctx parent of the last

125

# one-side-only changeset, but not further back than that

125

# one-side-only changeset, but not further back than that

126

repo = a._repo

126

repo = a._repo

127

128

if usechangesetcentricalgo(repo):

128

if usechangesetcentricalgo(repo):

129

return _changesetforwardcopies(a, b, match)

129

return _changesetforwardcopies(a, b, match)

130

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

132

dbg = repo.ui.debug

132

dbg = repo.ui.debug

133

if debug:

133

if debug:

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

135

am = a.manifest()

135

am = a.manifest()

136

basemf = None if base is None else base.manifest()

136

basemf = None if base is None else base.manifest()

137

138

# find where new files came from

138

# find where new files came from

139

# we currently don't try to find where old files went, too expensive

139

# we currently don't try to find where old files went, too expensive

140

# this means we can miss a case like 'hg rm b; hg cp a b'

140

# this means we can miss a case like 'hg rm b; hg cp a b'

141

cm = {}

141

cm = {}

142

143

# Computing the forward missing is quite expensive on large manifests, since

143

# Computing the forward missing is quite expensive on large manifests, since

144

# it compares the entire manifests. We can optimize it in the common use

144

# it compares the entire manifests. We can optimize it in the common use

145

# case of computing what copies are in a commit versus its parent (like

145

# case of computing what copies are in a commit versus its parent (like

146

# during a rebase or histedit). Note, we exclude merge commits from this

146

# during a rebase or histedit). Note, we exclude merge commits from this

147

# optimization, since the ctx.files() for a merge commit is not correct for

147

# optimization, since the ctx.files() for a merge commit is not correct for

148

# this comparison.

148

# this comparison.

149

forwardmissingmatch = match

149

forwardmissingmatch = match

150

if b.p1() == a and b.p2().node() == node.nullid:

150

if b.p1() == a and b.p2().node() == node.nullid:

151

filesmatcher = matchmod.exact(b.files())

151

filesmatcher = matchmod.exact(b.files())

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

154

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

156

157

if debug:

157

if debug:

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

159

160

for f in sorted(missing):

160

for f in sorted(missing):

161

if debug:

161

if debug:

162

dbg(b'debug.copies: tracing file: %s\n' % f)

162

dbg(b'debug.copies: tracing file: %s\n' % f)

163

fctx = b[f]

163

fctx = b[f]

164

fctx._ancestrycontext = ancestrycontext

164

fctx._ancestrycontext = ancestrycontext

165

166

if debug:

166

if debug:

167

start = util.timer()

167

start = util.timer()

168

opath = _tracefile(fctx, am, basemf)

168

opath = _tracefile(fctx, am, basemf)

169

if opath:

169

if opath:

170

if debug:

170

if debug:

171

dbg(b'debug.copies: rename of: %s\n' % opath)

171

dbg(b'debug.copies: rename of: %s\n' % opath)

172

cm[f] = opath

172

cm[f] = opath

173

if debug:

173

if debug:

174

dbg(

174

dbg(

175

b'debug.copies: time: %f seconds\n'

175

b'debug.copies: time: %f seconds\n'

176

% (util.timer() - start)

176

% (util.timer() - start)

177

)

177

)

178

return cm

178

return cm

179

180

181

def _changesetforwardcopies(a, b, match):

181

def _changesetforwardcopies(a, b, match):

182

if a.rev() in (node.nullrev, b.rev()):

182

if a.rev() in (node.nullrev, b.rev()):

183

return {}

183

return {}

184

185

repo = a.repo()

185

repo = a.repo()

186

children = {}

186

children = {}

187

cl = repo.changelog

187

cl = repo.changelog

188

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

188

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

189

for r in missingrevs:

189

for r in missingrevs:

190

for p in cl.parentrevs(r):

190

for p in cl.parentrevs(r):

191

if p == node.nullrev:

191

if p == node.nullrev:

192

continue

192

continue

193

if p not in children:

193

if p not in children:

194

children[p] = [r]

194

children[p] = [r]

195

else:

195

else:

196

children[p].append(r)

196

children[p].append(r)

197

198

roots = set(children) - set(missingrevs)

198

roots = set(children) - set(missingrevs)

199

work = list(roots)

199

work = list(roots)

200

all_copies = {r: {} for r in roots}

200

all_copies = {r: {} for r in roots}

201

heapq.heapify(work)

201

heapq.heapify(work)

202

alwaysmatch = match.always()

202

alwaysmatch = match.always()

203

while work:

203

while work:

204

r = heapq.heappop(work)

204

r = heapq.heappop(work)

205

copies = all_copies.pop(r)

205

copies = all_copies.pop(r)

206

if r == b.rev():

206

if r == b.rev():

207

return copies

207

return copies

208

for i, c in enumerate(children[r]):

208

for i, c in enumerate(children[r]):

209

childctx = repo[c]

209

childctx = repo[c]

210

p1, p2 = cl.parentrevs(c)

210

p1copies, p2copies = childctx._copies

211

p1copies, p2copies = childctx._copies

211

if r == ~~childctx~~.p1().~~rev~~():

212

if r == p1:

212

parent = 1

213

parent = 1

213

childcopies = p1copies

214

childcopies = p1copies

214

else:

215

else:

215

assert r == ~~childctx~~.p2().~~rev~~()

216

assert r == p2

216

parent = 2

217

parent = 2

217

childcopies = p2copies

218

childcopies = p2copies

218

if not alwaysmatch:

219

if not alwaysmatch:

219

childcopies = {

220

childcopies = {

220

dst: src for dst, src in childcopies.items() if match(dst)

221

dst: src for dst, src in childcopies.items() if match(dst)

221

}

222

}

222

# Copy the dict only if later iterations will also need it

223

# Copy the dict only if later iterations will also need it

223

if i != len(children[r]) - 1:

224

if i != len(children[r]) - 1:

224

newcopies = copies.copy()

225

newcopies = copies.copy()

225

else:

226

else:

226

newcopies = copies

227

newcopies = copies

227

if childcopies:

228

if childcopies:

228

newcopies = _chain(newcopies, childcopies)

229

newcopies = _chain(newcopies, childcopies)

229

for f in childctx.filesremoved():

230

for f in childctx.filesremoved():

230

if f in newcopies:

231

if f in newcopies:

231

del newcopies[f]

232

del newcopies[f]

232

othercopies = all_copies.get(c)

233

othercopies = all_copies.get(c)

233

if othercopies is None:

234

if othercopies is None:

234

heapq.heappush(work, c)

235

heapq.heappush(work, c)

235

all_copies[c] = newcopies

236

all_copies[c] = newcopies

236

else:

237

else:

237

# we are the second parent to work on c, we need to merge our

238

# we are the second parent to work on c, we need to merge our

238

# work with the other.

239

# work with the other.

239

#

240

#

240

# Unlike when copies are stored in the filelog, we consider

241

# Unlike when copies are stored in the filelog, we consider

241

# it a copy even if the destination already existed on the

242

# it a copy even if the destination already existed on the

242

# other branch. It's simply too expensive to check if the

243

# other branch. It's simply too expensive to check if the

243

# file existed in the manifest.

244

# file existed in the manifest.

244

#

245

#

245

# In case of conflict, parent 1 take precedence over parent 2.

246

# In case of conflict, parent 1 take precedence over parent 2.

246

# This is an arbitrary choice made anew when implementing

247

# This is an arbitrary choice made anew when implementing

247

# changeset based copies. It was made without regards with

248

# changeset based copies. It was made without regards with

248

# potential filelog related behavior.

249

# potential filelog related behavior.

249

if parent == 1:

250

if parent == 1:

250

othercopies.update(newcopies)

251

othercopies.update(newcopies)

251

else:

252

else:

252

newcopies.update(othercopies)

253

newcopies.update(othercopies)

253

all_copies[c] = newcopies

254

all_copies[c] = newcopies

254

assert False

255

assert False

255

256

257

def _forwardcopies(a, b, base=None, match=None):

258

def _forwardcopies(a, b, base=None, match=None):

258

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

259

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

259

260

if base is None:

261

if base is None:

261

base = a

262

base = a

262

match = a.repo().narrowmatch(match)

263

match = a.repo().narrowmatch(match)

263

# check for working copy

264

# check for working copy

264

if b.rev() is None:

265

if b.rev() is None:

265

cm = _committedforwardcopies(a, b.p1(), base, match)

266

cm = _committedforwardcopies(a, b.p1(), base, match)

266

# combine copies from dirstate if necessary

267

# combine copies from dirstate if necessary

267

copies = _chain(cm, _dirstatecopies(b._repo, match))

268

copies = _chain(cm, _dirstatecopies(b._repo, match))

268

else:

269

else:

269

copies = _committedforwardcopies(a, b, base, match)

270

copies = _committedforwardcopies(a, b, base, match)

270

return copies

271

return copies

271

272

273

def _backwardrenames(a, b, match):

274

def _backwardrenames(a, b, match):

274

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

275

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

275

return {}

276

return {}

276

277

# Even though we're not taking copies into account, 1:n rename situations

278

# Even though we're not taking copies into account, 1:n rename situations

278

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

279

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

279

# arbitrarily pick one of the renames.

280

# arbitrarily pick one of the renames.

280

# We don't want to pass in "match" here, since that would filter

281

# We don't want to pass in "match" here, since that would filter

281

# the destination by it. Since we're reversing the copies, we want

282

# the destination by it. Since we're reversing the copies, we want

282

# to filter the source instead.

283

# to filter the source instead.

283

f = _forwardcopies(b, a)

284

f = _forwardcopies(b, a)

284

r = {}

285

r = {}

285

for k, v in sorted(pycompat.iteritems(f)):

286

for k, v in sorted(pycompat.iteritems(f)):

286

if match and not match(v):

287

if match and not match(v):

287

continue

288

continue

288

# remove copies

289

# remove copies

289

if v in a:

290

if v in a:

290

continue

291

continue

291

r[v] = k

292

r[v] = k

292

return r

293

return r

293

294

295

def pathcopies(x, y, match=None):

296

def pathcopies(x, y, match=None):

296

"""find {dst@y: src@x} copy mapping for directed compare"""

297

"""find {dst@y: src@x} copy mapping for directed compare"""

297

repo = x._repo

298

repo = x._repo

298

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

299

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

299

if debug:

300

if debug:

300

repo.ui.debug(

301

repo.ui.debug(

301

b'debug.copies: searching copies from %s to %s\n' % (x, y)

302

b'debug.copies: searching copies from %s to %s\n' % (x, y)

302

)

303

)

303

if x == y or not x or not y:

304

if x == y or not x or not y:

304

return {}

305

return {}

305

a = y.ancestor(x)

306

a = y.ancestor(x)

306

if a == x:

307

if a == x:

307

if debug:

308

if debug:

308

repo.ui.debug(b'debug.copies: search mode: forward\n')

309

repo.ui.debug(b'debug.copies: search mode: forward\n')

309

if y.rev() is None and x == y.p1():

310

if y.rev() is None and x == y.p1():

310

# short-circuit to avoid issues with merge states

311

# short-circuit to avoid issues with merge states

311

return _dirstatecopies(repo, match)

312

return _dirstatecopies(repo, match)

312

copies = _forwardcopies(x, y, match=match)

313

copies = _forwardcopies(x, y, match=match)

313

elif a == y:

314

elif a == y:

314

if debug:

315

if debug:

315

repo.ui.debug(b'debug.copies: search mode: backward\n')

316

repo.ui.debug(b'debug.copies: search mode: backward\n')

316

copies = _backwardrenames(x, y, match=match)

317

copies = _backwardrenames(x, y, match=match)

317

else:

318

else:

318

if debug:

319

if debug:

319

repo.ui.debug(b'debug.copies: search mode: combined\n')

320

repo.ui.debug(b'debug.copies: search mode: combined\n')

320

base = None

321

base = None

321

if a.rev() != node.nullrev:

322

if a.rev() != node.nullrev:

322

base = x

323

base = x

323

copies = _chain(

324

copies = _chain(

324

_backwardrenames(x, a, match=match),

325

_backwardrenames(x, a, match=match),

325

_forwardcopies(a, y, base, match=match),

326

_forwardcopies(a, y, base, match=match),

326

)

327

)

327

_filter(x, y, copies)

328

_filter(x, y, copies)

328

return copies

329

return copies

329

330

331

def mergecopies(repo, c1, c2, base):

332

def mergecopies(repo, c1, c2, base):

332

"""

333

"""

333

Finds moves and copies between context c1 and c2 that are relevant for

334

Finds moves and copies between context c1 and c2 that are relevant for

334

merging. 'base' will be used as the merge base.

335

merging. 'base' will be used as the merge base.

335

336

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

337

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

337

files that were moved/ copied in one merge parent and modified in another.

338

files that were moved/ copied in one merge parent and modified in another.

338

For example:

339

For example:

339

340

o ---> 4 another commit

341

o ---> 4 another commit

341

|

342

|

342

| o ---> 3 commit that modifies a.txt

343

| o ---> 3 commit that modifies a.txt

343

| /

344

| /

344

o / ---> 2 commit that moves a.txt to b.txt

345

o / ---> 2 commit that moves a.txt to b.txt

345

|/

346

|/

346

o ---> 1 merge base

347

o ---> 1 merge base

347

348

If we try to rebase revision 3 on revision 4, since there is no a.txt in

349

If we try to rebase revision 3 on revision 4, since there is no a.txt in

349

revision 4, and if user have copytrace disabled, we prints the following

350

revision 4, and if user have copytrace disabled, we prints the following

350

message:

351

message:

351

352

```other changed <file> which local deleted```

353

```other changed <file> which local deleted```

353

354

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

355

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

355

"dirmove".

356

"dirmove".

356

357

"copy" is a mapping from destination name -> source name,

358

"copy" is a mapping from destination name -> source name,

358

where source is in c1 and destination is in c2 or vice-versa.

359

where source is in c1 and destination is in c2 or vice-versa.

359

360

"movewithdir" is a mapping from source name -> destination name,

361

"movewithdir" is a mapping from source name -> destination name,

361

where the file at source present in one context but not the other

362

where the file at source present in one context but not the other

362

needs to be moved to destination by the merge process, because the

363

needs to be moved to destination by the merge process, because the

363

other context moved the directory it is in.

364

other context moved the directory it is in.

364

365

"diverge" is a mapping of source name -> list of destination names

366

"diverge" is a mapping of source name -> list of destination names

366

for divergent renames.

367

for divergent renames.

367

368

"renamedelete" is a mapping of source name -> list of destination

369

"renamedelete" is a mapping of source name -> list of destination

369

names for files deleted in c1 that were renamed in c2 or vice-versa.

370

names for files deleted in c1 that were renamed in c2 or vice-versa.

370

371

"dirmove" is a mapping of detected source dir -> destination dir renames.

372

"dirmove" is a mapping of detected source dir -> destination dir renames.

372

This is needed for handling changes to new files previously grafted into

373

This is needed for handling changes to new files previously grafted into

373

renamed directories.

374

renamed directories.

374

375

This function calls different copytracing algorithms based on config.

376

This function calls different copytracing algorithms based on config.

376

"""

377

"""

377

# avoid silly behavior for update from empty dir

378

# avoid silly behavior for update from empty dir

378

if not c1 or not c2 or c1 == c2:

379

if not c1 or not c2 or c1 == c2:

379

return {}, {}, {}, {}, {}

380

return {}, {}, {}, {}, {}

380

381

narrowmatch = c1.repo().narrowmatch()

382

narrowmatch = c1.repo().narrowmatch()

382

383

# avoid silly behavior for parent -> working dir

384

# avoid silly behavior for parent -> working dir

384

if c2.node() is None and c1.node() == repo.dirstate.p1():

385

if c2.node() is None and c1.node() == repo.dirstate.p1():

385

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

386

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

386

387

copytracing = repo.ui.config(b'experimental', b'copytrace')

388

copytracing = repo.ui.config(b'experimental', b'copytrace')

388

if stringutil.parsebool(copytracing) is False:

389

if stringutil.parsebool(copytracing) is False:

389

# stringutil.parsebool() returns None when it is unable to parse the

390

# stringutil.parsebool() returns None when it is unable to parse the

390

# value, so we should rely on making sure copytracing is on such cases

391

# value, so we should rely on making sure copytracing is on such cases

391

return {}, {}, {}, {}, {}

392

return {}, {}, {}, {}, {}

392

393

if usechangesetcentricalgo(repo):

394

if usechangesetcentricalgo(repo):

394

# The heuristics don't make sense when we need changeset-centric algos

395

# The heuristics don't make sense when we need changeset-centric algos

395

return _fullcopytracing(repo, c1, c2, base)

396

return _fullcopytracing(repo, c1, c2, base)

396

397

# Copy trace disabling is explicitly below the node == p1 logic above

398

# Copy trace disabling is explicitly below the node == p1 logic above

398

# because the logic above is required for a simple copy to be kept across a

399

# because the logic above is required for a simple copy to be kept across a

399

# rebase.

400

# rebase.

400

if copytracing == b'heuristics':

401

if copytracing == b'heuristics':

401

# Do full copytracing if only non-public revisions are involved as

402

# Do full copytracing if only non-public revisions are involved as

402

# that will be fast enough and will also cover the copies which could

403

# that will be fast enough and will also cover the copies which could

403

# be missed by heuristics

404

# be missed by heuristics

404

if _isfullcopytraceable(repo, c1, base):

405

if _isfullcopytraceable(repo, c1, base):

405

return _fullcopytracing(repo, c1, c2, base)

406

return _fullcopytracing(repo, c1, c2, base)

406

return _heuristicscopytracing(repo, c1, c2, base)

407

return _heuristicscopytracing(repo, c1, c2, base)

407

else:

408

else:

408

return _fullcopytracing(repo, c1, c2, base)

409

return _fullcopytracing(repo, c1, c2, base)

409

410

411

def _isfullcopytraceable(repo, c1, base):

412

def _isfullcopytraceable(repo, c1, base):

412

""" Checks that if base, source and destination are all no-public branches,

413

""" Checks that if base, source and destination are all no-public branches,

413

if yes let's use the full copytrace algorithm for increased capabilities

414

if yes let's use the full copytrace algorithm for increased capabilities

414

since it will be fast enough.

415

since it will be fast enough.

415

416

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

417

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

417

number of changesets from c1 to base such that if number of changesets are

418

number of changesets from c1 to base such that if number of changesets are

418

more than the limit, full copytracing algorithm won't be used.

419

more than the limit, full copytracing algorithm won't be used.

419

"""

420

"""

420

if c1.rev() is None:

421

if c1.rev() is None:

421

c1 = c1.p1()

422

c1 = c1.p1()

422

if c1.mutable() and base.mutable():

423

if c1.mutable() and base.mutable():

423

sourcecommitlimit = repo.ui.configint(

424

sourcecommitlimit = repo.ui.configint(

424

b'experimental', b'copytrace.sourcecommitlimit'

425

b'experimental', b'copytrace.sourcecommitlimit'

425

)

426

)

426

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

427

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

427

return commits < sourcecommitlimit

428

return commits < sourcecommitlimit

428

return False

429

return False

429

430

431

def _checksinglesidecopies(

432

def _checksinglesidecopies(

432

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

433

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

433

):

434

):

434

if src not in m2:

435

if src not in m2:

435

# deleted on side 2

436

# deleted on side 2

436

if src not in m1:

437

if src not in m1:

437

# renamed on side 1, deleted on side 2

438

# renamed on side 1, deleted on side 2

438

renamedelete[src] = dsts1

439

renamedelete[src] = dsts1

439

elif m2[src] != mb[src]:

440

elif m2[src] != mb[src]:

440

if not _related(c2[src], base[src]):

441

if not _related(c2[src], base[src]):

441

return

442

return

442

# modified on side 2

443

# modified on side 2

443

for dst in dsts1:

444

for dst in dsts1:

444

if dst not in m2:

445

if dst not in m2:

445

# dst not added on side 2 (handle as regular

446

# dst not added on side 2 (handle as regular

446

# "both created" case in manifestmerge otherwise)

447

# "both created" case in manifestmerge otherwise)

447

copy[dst] = src

448

copy[dst] = src

448

449

450

def _fullcopytracing(repo, c1, c2, base):

451

def _fullcopytracing(repo, c1, c2, base):

451

""" The full copytracing algorithm which finds all the new files that were

452

""" The full copytracing algorithm which finds all the new files that were

452

added from merge base up to the top commit and for each file it checks if

453

added from merge base up to the top commit and for each file it checks if

453

this file was copied from another file.

454

this file was copied from another file.

454

455

This is pretty slow when a lot of changesets are involved but will track all

456

This is pretty slow when a lot of changesets are involved but will track all

456

the copies.

457

the copies.

457

"""

458

"""

458

m1 = c1.manifest()

459

m1 = c1.manifest()

459

m2 = c2.manifest()

460

m2 = c2.manifest()

460

mb = base.manifest()

461

mb = base.manifest()

461

462

copies1 = pathcopies(base, c1)

463

copies1 = pathcopies(base, c1)

463

copies2 = pathcopies(base, c2)

464

copies2 = pathcopies(base, c2)

464

465

inversecopies1 = {}

466

inversecopies1 = {}

466

inversecopies2 = {}

467

inversecopies2 = {}

467

for dst, src in copies1.items():

468

for dst, src in copies1.items():

468

inversecopies1.setdefault(src, []).append(dst)

469

inversecopies1.setdefault(src, []).append(dst)

469

for dst, src in copies2.items():

470

for dst, src in copies2.items():

470

inversecopies2.setdefault(src, []).append(dst)

471

inversecopies2.setdefault(src, []).append(dst)

471

472

copy = {}

473

copy = {}

473

diverge = {}

474

diverge = {}

474

renamedelete = {}

475

renamedelete = {}

475

allsources = set(inversecopies1) | set(inversecopies2)

476

allsources = set(inversecopies1) | set(inversecopies2)

476

for src in allsources:

477

for src in allsources:

477

dsts1 = inversecopies1.get(src)

478

dsts1 = inversecopies1.get(src)

478

dsts2 = inversecopies2.get(src)

479

dsts2 = inversecopies2.get(src)

479

if dsts1 and dsts2:

480

if dsts1 and dsts2:

480

# copied/renamed on both sides

481

# copied/renamed on both sides

481

if src not in m1 and src not in m2:

482

if src not in m1 and src not in m2:

482

# renamed on both sides

483

# renamed on both sides

483

dsts1 = set(dsts1)

484

dsts1 = set(dsts1)

484

dsts2 = set(dsts2)

485

dsts2 = set(dsts2)

485

# If there's some overlap in the rename destinations, we

486

# If there's some overlap in the rename destinations, we

486

# consider it not divergent. For example, if side 1 copies 'a'

487

# consider it not divergent. For example, if side 1 copies 'a'

487

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

488

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

488

# and 'd' and deletes 'a'.

489

# and 'd' and deletes 'a'.

489

if dsts1 & dsts2:

490

if dsts1 & dsts2:

490

for dst in dsts1 & dsts2:

491

for dst in dsts1 & dsts2:

491

copy[dst] = src

492

copy[dst] = src

492

else:

493

else:

493

diverge[src] = sorted(dsts1 | dsts2)

494

diverge[src] = sorted(dsts1 | dsts2)

494

elif src in m1 and src in m2:

495

elif src in m1 and src in m2:

495

# copied on both sides

496

# copied on both sides

496

dsts1 = set(dsts1)

497

dsts1 = set(dsts1)

497

dsts2 = set(dsts2)

498

dsts2 = set(dsts2)

498

for dst in dsts1 & dsts2:

499

for dst in dsts1 & dsts2:

499

copy[dst] = src

500

copy[dst] = src

500

# TODO: Handle cases where it was renamed on one side and copied

501

# TODO: Handle cases where it was renamed on one side and copied

501

# on the other side

502

# on the other side

502

elif dsts1:

503

elif dsts1:

503

# copied/renamed only on side 1

504

# copied/renamed only on side 1

504

_checksinglesidecopies(

505

_checksinglesidecopies(

505

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

506

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

506

)

507

)

507

elif dsts2:

508

elif dsts2:

508

# copied/renamed only on side 2

509

# copied/renamed only on side 2

509

_checksinglesidecopies(

510

_checksinglesidecopies(

510

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

511

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

511

)

512

)

512

513

renamedeleteset = set()

514

renamedeleteset = set()

514

divergeset = set()

515

divergeset = set()

515

for dsts in diverge.values():

516

for dsts in diverge.values():

516

divergeset.update(dsts)

517

divergeset.update(dsts)

517

for dsts in renamedelete.values():

518

for dsts in renamedelete.values():

518

renamedeleteset.update(dsts)

519

renamedeleteset.update(dsts)

519

520

# find interesting file sets from manifests

521

# find interesting file sets from manifests

521

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

522

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

522

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

523

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

523

u1 = sorted(addedinm1 - addedinm2)

524

u1 = sorted(addedinm1 - addedinm2)

524

u2 = sorted(addedinm2 - addedinm1)

525

u2 = sorted(addedinm2 - addedinm1)

525

526

header = b" unmatched files in %s"

527

header = b" unmatched files in %s"

527

if u1:

528

if u1:

528

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

529

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

529

if u2:

530

if u2:

530

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

531

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

531

532

fullcopy = copies1.copy()

533

fullcopy = copies1.copy()

533

fullcopy.update(copies2)

534

fullcopy.update(copies2)

534

if not fullcopy:

535

if not fullcopy:

535

return copy, {}, diverge, renamedelete, {}

536

return copy, {}, diverge, renamedelete, {}

536

537

if repo.ui.debugflag:

538

if repo.ui.debugflag:

538

repo.ui.debug(

539

repo.ui.debug(

539

b" all copies found (* = to merge, ! = divergent, "

540

b" all copies found (* = to merge, ! = divergent, "

540

b"% = renamed and deleted):\n"

541

b"% = renamed and deleted):\n"

541

)

542

)

542

for f in sorted(fullcopy):

543

for f in sorted(fullcopy):

543

note = b""

544

note = b""

544

if f in copy:

545

if f in copy:

545

note += b"*"

546

note += b"*"

546

if f in divergeset:

547

if f in divergeset:

547

note += b"!"

548

note += b"!"

548

if f in renamedeleteset:

549

if f in renamedeleteset:

549

note += b"%"

550

note += b"%"

550

repo.ui.debug(

551

repo.ui.debug(

551

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

552

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

552

)

553

)

553

del divergeset

554

del divergeset

554

555

repo.ui.debug(b" checking for directory renames\n")

556

repo.ui.debug(b" checking for directory renames\n")

556

557

# generate a directory move map

558

# generate a directory move map

558

d1, d2 = c1.dirs(), c2.dirs()

559

d1, d2 = c1.dirs(), c2.dirs()

559

invalid = set()

560

invalid = set()

560

dirmove = {}

561

dirmove = {}

561

562

# examine each file copy for a potential directory move, which is

563

# examine each file copy for a potential directory move, which is

563

# when all the files in a directory are moved to a new directory

564

# when all the files in a directory are moved to a new directory

564

for dst, src in pycompat.iteritems(fullcopy):

565

for dst, src in pycompat.iteritems(fullcopy):

565

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

566

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

566

if dsrc in invalid:

567

if dsrc in invalid:

567

# already seen to be uninteresting

568

# already seen to be uninteresting

568

continue

569

continue

569

elif dsrc in d1 and ddst in d1:

570

elif dsrc in d1 and ddst in d1:

570

# directory wasn't entirely moved locally

571

# directory wasn't entirely moved locally

571

invalid.add(dsrc)

572

invalid.add(dsrc)

572

elif dsrc in d2 and ddst in d2:

573

elif dsrc in d2 and ddst in d2:

573

# directory wasn't entirely moved remotely

574

# directory wasn't entirely moved remotely

574

invalid.add(dsrc)

575

invalid.add(dsrc)

575

elif dsrc in dirmove and dirmove[dsrc] != ddst:

576

elif dsrc in dirmove and dirmove[dsrc] != ddst:

576

# files from the same directory moved to two different places

577

# files from the same directory moved to two different places

577

invalid.add(dsrc)

578

invalid.add(dsrc)

578

else:

579

else:

579

# looks good so far

580

# looks good so far

580

dirmove[dsrc] = ddst

581

dirmove[dsrc] = ddst

581

582

for i in invalid:

583

for i in invalid:

583

if i in dirmove:

584

if i in dirmove:

584

del dirmove[i]

585

del dirmove[i]

585

del d1, d2, invalid

586

del d1, d2, invalid

586

587

if not dirmove:

588

if not dirmove:

588

return copy, {}, diverge, renamedelete, {}

589

return copy, {}, diverge, renamedelete, {}

589

590

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

591

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

591

592

for d in dirmove:

593

for d in dirmove:

593

repo.ui.debug(

594

repo.ui.debug(

594

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

595

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

595

)

596

)

596

597

movewithdir = {}

598

movewithdir = {}

598

# check unaccounted nonoverlapping files against directory moves

599

# check unaccounted nonoverlapping files against directory moves

599

for f in u1 + u2:

600

for f in u1 + u2:

600

if f not in fullcopy:

601

if f not in fullcopy:

601

for d in dirmove:

602

for d in dirmove:

602

if f.startswith(d):

603

if f.startswith(d):

603

# new file added in a directory that was moved, move it

604

# new file added in a directory that was moved, move it

604

df = dirmove[d] + f[len(d) :]

605

df = dirmove[d] + f[len(d) :]

605

if df not in copy:

606

if df not in copy:

606

movewithdir[f] = df

607

movewithdir[f] = df

607

repo.ui.debug(

608

repo.ui.debug(

608

b" pending file src: '%s' -> dst: '%s'\n"

609

b" pending file src: '%s' -> dst: '%s'\n"

609

% (f, df)

610

% (f, df)

610

)

611

)

611

break

612

break

612

613

return copy, movewithdir, diverge, renamedelete, dirmove

614

return copy, movewithdir, diverge, renamedelete, dirmove

614

615

616

def _heuristicscopytracing(repo, c1, c2, base):

617

def _heuristicscopytracing(repo, c1, c2, base):

617

""" Fast copytracing using filename heuristics

618

""" Fast copytracing using filename heuristics

618

619

Assumes that moves or renames are of following two types:

620

Assumes that moves or renames are of following two types:

620

621

1) Inside a directory only (same directory name but different filenames)

622

1) Inside a directory only (same directory name but different filenames)

622

2) Move from one directory to another

623

2) Move from one directory to another

623

(same filenames but different directory names)

624

(same filenames but different directory names)

624

625

Works only when there are no merge commits in the "source branch".

626

Works only when there are no merge commits in the "source branch".

626

Source branch is commits from base up to c2 not including base.

627

Source branch is commits from base up to c2 not including base.

627

628

If merge is involved it fallbacks to _fullcopytracing().

629

If merge is involved it fallbacks to _fullcopytracing().

629

630

Can be used by setting the following config:

631

Can be used by setting the following config:

631

632

[experimental]

633

[experimental]

633

copytrace = heuristics

634

copytrace = heuristics

634

635

In some cases the copy/move candidates found by heuristics can be very large

636

In some cases the copy/move candidates found by heuristics can be very large

636

in number and that will make the algorithm slow. The number of possible

637

in number and that will make the algorithm slow. The number of possible

637

candidates to check can be limited by using the config

638

candidates to check can be limited by using the config

638

`experimental.copytrace.movecandidateslimit` which defaults to 100.

639

`experimental.copytrace.movecandidateslimit` which defaults to 100.

639

"""

640

"""

640

641

if c1.rev() is None:

642

if c1.rev() is None:

642

c1 = c1.p1()

643

c1 = c1.p1()

643

if c2.rev() is None:

644

if c2.rev() is None:

644

c2 = c2.p1()

645

c2 = c2.p1()

645

646

copies = {}

647

copies = {}

647

648

changedfiles = set()

649

changedfiles = set()

649

m1 = c1.manifest()

650

m1 = c1.manifest()

650

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

651

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

651

# If base is not in c2 branch, we switch to fullcopytracing

652

# If base is not in c2 branch, we switch to fullcopytracing

652

repo.ui.debug(

653

repo.ui.debug(

653

b"switching to full copytracing as base is not "

654

b"switching to full copytracing as base is not "

654

b"an ancestor of c2\n"

655

b"an ancestor of c2\n"

655

)

656

)

656

return _fullcopytracing(repo, c1, c2, base)

657

return _fullcopytracing(repo, c1, c2, base)

657

658

ctx = c2

659

ctx = c2

659

while ctx != base:

660

while ctx != base:

660

if len(ctx.parents()) == 2:

661

if len(ctx.parents()) == 2:

661

# To keep things simple let's not handle merges

662

# To keep things simple let's not handle merges

662

repo.ui.debug(b"switching to full copytracing because of merges\n")

663

repo.ui.debug(b"switching to full copytracing because of merges\n")

663

return _fullcopytracing(repo, c1, c2, base)

664

return _fullcopytracing(repo, c1, c2, base)

664

changedfiles.update(ctx.files())

665

changedfiles.update(ctx.files())

665

ctx = ctx.p1()

666

ctx = ctx.p1()

666

667

cp = _forwardcopies(base, c2)

668

cp = _forwardcopies(base, c2)

668

for dst, src in pycompat.iteritems(cp):

669

for dst, src in pycompat.iteritems(cp):

669

if src in m1:

670

if src in m1:

670

copies[dst] = src

671

copies[dst] = src

671

672

# file is missing if it isn't present in the destination, but is present in

673

# file is missing if it isn't present in the destination, but is present in

673

# the base and present in the source.

674

# the base and present in the source.

674

# Presence in the base is important to exclude added files, presence in the

675

# Presence in the base is important to exclude added files, presence in the

675

# source is important to exclude removed files.

676

# source is important to exclude removed files.

676

filt = lambda f: f not in m1 and f in base and f in c2

677

filt = lambda f: f not in m1 and f in base and f in c2

677

missingfiles = [f for f in changedfiles if filt(f)]

678

missingfiles = [f for f in changedfiles if filt(f)]

678

679

if missingfiles:

680

if missingfiles:

680

basenametofilename = collections.defaultdict(list)

681

basenametofilename = collections.defaultdict(list)

681

dirnametofilename = collections.defaultdict(list)

682

dirnametofilename = collections.defaultdict(list)

682

683

for f in m1.filesnotin(base.manifest()):

684

for f in m1.filesnotin(base.manifest()):

684

basename = os.path.basename(f)

685

basename = os.path.basename(f)

685

dirname = os.path.dirname(f)

686

dirname = os.path.dirname(f)

686

basenametofilename[basename].append(f)

687

basenametofilename[basename].append(f)

687

dirnametofilename[dirname].append(f)

688

dirnametofilename[dirname].append(f)

688

689

for f in missingfiles:

690

for f in missingfiles:

690

basename = os.path.basename(f)

691

basename = os.path.basename(f)

691

dirname = os.path.dirname(f)

692

dirname = os.path.dirname(f)

692

samebasename = basenametofilename[basename]

693

samebasename = basenametofilename[basename]

693

samedirname = dirnametofilename[dirname]

694

samedirname = dirnametofilename[dirname]

694

movecandidates = samebasename + samedirname

695

movecandidates = samebasename + samedirname

695

# f is guaranteed to be present in c2, that's why

696

# f is guaranteed to be present in c2, that's why

696

# c2.filectx(f) won't fail

697

# c2.filectx(f) won't fail

697

f2 = c2.filectx(f)

698

f2 = c2.filectx(f)

698

# we can have a lot of candidates which can slow down the heuristics

699

# we can have a lot of candidates which can slow down the heuristics

699

# config value to limit the number of candidates moves to check

700

# config value to limit the number of candidates moves to check

700

maxcandidates = repo.ui.configint(

701

maxcandidates = repo.ui.configint(

701

b'experimental', b'copytrace.movecandidateslimit'

702

b'experimental', b'copytrace.movecandidateslimit'

702

)

703

)

703

704

if len(movecandidates) > maxcandidates:

705

if len(movecandidates) > maxcandidates:

705

repo.ui.status(

706

repo.ui.status(

706

_(

707

_(

707

b"skipping copytracing for '%s', more "

708

b"skipping copytracing for '%s', more "

708

b"candidates than the limit: %d\n"

709

b"candidates than the limit: %d\n"

709

)

710

)

710

% (f, len(movecandidates))

711

% (f, len(movecandidates))

711

)

712

)

712

continue

713

continue

713

714

for candidate in movecandidates:

715

for candidate in movecandidates:

715

f1 = c1.filectx(candidate)

716

f1 = c1.filectx(candidate)

716

if _related(f1, f2):

717

if _related(f1, f2):

717

# if there are a few related copies then we'll merge

718

# if there are a few related copies then we'll merge

718

# changes into all of them. This matches the behaviour

719

# changes into all of them. This matches the behaviour

719

# of upstream copytracing

720

# of upstream copytracing

720

copies[candidate] = f

721

copies[candidate] = f

721

722

return copies, {}, {}, {}, {}

723

return copies, {}, {}, {}, {}

723

724

725

def _related(f1, f2):

726

def _related(f1, f2):

726

"""return True if f1 and f2 filectx have a common ancestor

727

"""return True if f1 and f2 filectx have a common ancestor

727

728

Walk back to common ancestor to see if the two files originate

729

Walk back to common ancestor to see if the two files originate

729

from the same file. Since workingfilectx's rev() is None it messes

730

from the same file. Since workingfilectx's rev() is None it messes

730

up the integer comparison logic, hence the pre-step check for

731

up the integer comparison logic, hence the pre-step check for

731

None (f1 and f2 can only be workingfilectx's initially).

732

None (f1 and f2 can only be workingfilectx's initially).

732

"""

733

"""

733

734

if f1 == f2:

735

if f1 == f2:

735

return True # a match

736

return True # a match

736

737

g1, g2 = f1.ancestors(), f2.ancestors()

738

g1, g2 = f1.ancestors(), f2.ancestors()

738

try:

739

try:

739

f1r, f2r = f1.linkrev(), f2.linkrev()

740

f1r, f2r = f1.linkrev(), f2.linkrev()

740

741

if f1r is None:

742

if f1r is None:

742

f1 = next(g1)

743

f1 = next(g1)

743

if f2r is None:

744

if f2r is None:

744

f2 = next(g2)

745

f2 = next(g2)

745

746

while True:

747

while True:

747

f1r, f2r = f1.linkrev(), f2.linkrev()

748

f1r, f2r = f1.linkrev(), f2.linkrev()

748

if f1r > f2r:

749

if f1r > f2r:

749

f1 = next(g1)

750

f1 = next(g1)

750

elif f2r > f1r:

751

elif f2r > f1r:

751

f2 = next(g2)

752

f2 = next(g2)

752

else: # f1 and f2 point to files in the same linkrev

753

else: # f1 and f2 point to files in the same linkrev

753

return f1 == f2 # true if they point to the same file

754

return f1 == f2 # true if they point to the same file

754

except StopIteration:

755

except StopIteration:

755

return False

756

return False

756

757

758

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

759

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

759

"""reproduce copies from fromrev to rev in the dirstate

760

"""reproduce copies from fromrev to rev in the dirstate

760

761

If skiprev is specified, it's a revision that should be used to

762

If skiprev is specified, it's a revision that should be used to

762

filter copy records. Any copies that occur between fromrev and

763

filter copy records. Any copies that occur between fromrev and

763

skiprev will not be duplicated, even if they appear in the set of

764

skiprev will not be duplicated, even if they appear in the set of

764

copies between fromrev and rev.

765

copies between fromrev and rev.

765

"""

766

"""

766

exclude = {}

767

exclude = {}

767

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

768

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

768

bctrace = stringutil.parsebool(ctraceconfig)

769

bctrace = stringutil.parsebool(ctraceconfig)

769

if skiprev is not None and (

770

if skiprev is not None and (

770

ctraceconfig == b'heuristics' or bctrace or bctrace is None

771

ctraceconfig == b'heuristics' or bctrace or bctrace is None

771

):

772

):

772

# copytrace='off' skips this line, but not the entire function because

773

# copytrace='off' skips this line, but not the entire function because

773

# the line below is O(size of the repo) during a rebase, while the rest

774

# the line below is O(size of the repo) during a rebase, while the rest

774

# of the function is much faster (and is required for carrying copy

775

# of the function is much faster (and is required for carrying copy

775

# metadata across the rebase anyway).

776

# metadata across the rebase anyway).

776

exclude = pathcopies(repo[fromrev], repo[skiprev])

777

exclude = pathcopies(repo[fromrev], repo[skiprev])

777

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

778

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

778

if dst in exclude:

779

if dst in exclude:

779

continue

780

continue

780

if dst in wctx:

781

if dst in wctx:

781

wctx[dst].markcopied(src)

782

wctx[dst].markcopied(src)

782

783

784

def computechangesetfilesadded(ctx):

785

def computechangesetfilesadded(ctx):

785

"""return the list of files added in a changeset

786

"""return the list of files added in a changeset

786

"""

787

"""

787

added = []

788

added = []

788

for f in ctx.files():

789

for f in ctx.files():

789

if not any(f in p for p in ctx.parents()):

790

if not any(f in p for p in ctx.parents()):

790

added.append(f)

791

added.append(f)

791

return added

792

return added

792

793

794

def computechangesetfilesremoved(ctx):

795

def computechangesetfilesremoved(ctx):

795

"""return the list of files removed in a changeset

796

"""return the list of files removed in a changeset

796

"""

797

"""

797

removed = []

798

removed = []

798

for f in ctx.files():

799

for f in ctx.files():

799

if f not in ctx:

800

if f not in ctx:

800

removed.append(f)

801

removed.append(f)

801

return removed

802

return removed

802

803

804

def computechangesetcopies(ctx):

805

def computechangesetcopies(ctx):

805

"""return the copies data for a changeset

806

"""return the copies data for a changeset

806

807

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

808

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

808

809

Each dictionnary are in the form: `{newname: oldname}`

810

Each dictionnary are in the form: `{newname: oldname}`

810

"""

811

"""

811

p1copies = {}

812

p1copies = {}

812

p2copies = {}

813

p2copies = {}

813

p1 = ctx.p1()

814

p1 = ctx.p1()

814

p2 = ctx.p2()

815

p2 = ctx.p2()

815

narrowmatch = ctx._repo.narrowmatch()

816

narrowmatch = ctx._repo.narrowmatch()

816

for dst in ctx.files():

817

for dst in ctx.files():

817

if not narrowmatch(dst) or dst not in ctx:

818

if not narrowmatch(dst) or dst not in ctx:

818

continue

819

continue

819

copied = ctx[dst].renamed()

820

copied = ctx[dst].renamed()

820

if not copied:

821

if not copied:

821

continue

822

continue

822

src, srcnode = copied

823

src, srcnode = copied

823

if src in p1 and p1[src].filenode() == srcnode:

824

if src in p1 and p1[src].filenode() == srcnode:

824

p1copies[dst] = src

825

p1copies[dst] = src

825

elif src in p2 and p2[src].filenode() == srcnode:

826

elif src in p2 and p2[src].filenode() == srcnode:

826

p2copies[dst] = src

827

p2copies[dst] = src

827

return p1copies, p2copies

828

return p1copies, p2copies

828

829

830

def encodecopies(files, copies):

831

def encodecopies(files, copies):

831

items = []

832

items = []

832

for i, dst in enumerate(files):

833

for i, dst in enumerate(files):

833

if dst in copies:

834

if dst in copies:

834

items.append(b'%d\0%s' % (i, copies[dst]))

835

items.append(b'%d\0%s' % (i, copies[dst]))

835

if len(items) != len(copies):

836

if len(items) != len(copies):

836

raise error.ProgrammingError(

837

raise error.ProgrammingError(

837

b'some copy targets missing from file list'

838

b'some copy targets missing from file list'

838

)

839

)

839

return b"\n".join(items)

840

return b"\n".join(items)

840

841

842

def decodecopies(files, data):

843

def decodecopies(files, data):

843

try:

844

try:

844

copies = {}

845

copies = {}

845

if not data:

846

if not data:

846

return copies

847

return copies

847

for l in data.split(b'\n'):

848

for l in data.split(b'\n'):

848

strindex, src = l.split(b'\0')

849

strindex, src = l.split(b'\0')

849

i = int(strindex)

850

i = int(strindex)

850

dst = files[i]

851

dst = files[i]

851

copies[dst] = src

852

copies[dst] = src

852

return copies

853

return copies

853

except (ValueError, IndexError):

854

except (ValueError, IndexError):

854

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

855

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

855

# used different syntax for the value.

856

# used different syntax for the value.

856

return None

857

return None

857

858

859

def encodefileindices(files, subset):

860

def encodefileindices(files, subset):

860

subset = set(subset)

861

subset = set(subset)

861

indices = []

862

indices = []

862

for i, f in enumerate(files):

863

for i, f in enumerate(files):

863

if f in subset:

864

if f in subset:

864

indices.append(b'%d' % i)

865

indices.append(b'%d' % i)

865

return b'\n'.join(indices)

866

return b'\n'.join(indices)

866

867

868

def decodefileindices(files, data):

869

def decodefileindices(files, data):

869

try:

870

try:

870

subset = []

871

subset = []

871

if not data:

872

if not data:

872

return subset

873

return subset

873

for strindex in data.split(b'\n'):

874

for strindex in data.split(b'\n'):

874

i = int(strindex)

875

i = int(strindex)

875

if i < 0 or i >= len(files):

876

if i < 0 or i >= len(files):

876

return None

877

return None

877

subset.append(files[i])

878

subset.append(files[i])

878

return subset

879

return subset

879

except (ValueError, IndexError):

880

except (ValueError, IndexError):

880

# Perhaps someone had chosen the same key name (e.g. "added") and

881

# Perhaps someone had chosen the same key name (e.g. "added") and

881

# used different syntax for the value.

882

# used different syntax for the value.

882

return None

883

return None

883

884

885

def _getsidedata(srcrepo, rev):

886

def _getsidedata(srcrepo, rev):

886

ctx = srcrepo[rev]

887

ctx = srcrepo[rev]

887

filescopies = computechangesetcopies(ctx)

888

filescopies = computechangesetcopies(ctx)

888

filesadded = computechangesetfilesadded(ctx)

889

filesadded = computechangesetfilesadded(ctx)

889

filesremoved = computechangesetfilesremoved(ctx)

890

filesremoved = computechangesetfilesremoved(ctx)

890

sidedata = {}

891

sidedata = {}

891

if any([filescopies, filesadded, filesremoved]):

892

if any([filescopies, filesadded, filesremoved]):

892

sortedfiles = sorted(ctx.files())

893

sortedfiles = sorted(ctx.files())

893

p1copies, p2copies = filescopies

894

p1copies, p2copies = filescopies

894

p1copies = encodecopies(sortedfiles, p1copies)

895

p1copies = encodecopies(sortedfiles, p1copies)

895

p2copies = encodecopies(sortedfiles, p2copies)

896

p2copies = encodecopies(sortedfiles, p2copies)

896

filesadded = encodefileindices(sortedfiles, filesadded)

897

filesadded = encodefileindices(sortedfiles, filesadded)

897

filesremoved = encodefileindices(sortedfiles, filesremoved)

898

filesremoved = encodefileindices(sortedfiles, filesremoved)

898

if p1copies:

899

if p1copies:

899

sidedata[sidedatamod.SD_P1COPIES] = p1copies

900

sidedata[sidedatamod.SD_P1COPIES] = p1copies

900

if p2copies:

901

if p2copies:

901

sidedata[sidedatamod.SD_P2COPIES] = p2copies

902

sidedata[sidedatamod.SD_P2COPIES] = p2copies

902

if filesadded:

903

if filesadded:

903

sidedata[sidedatamod.SD_FILESADDED] = filesadded

904

sidedata[sidedatamod.SD_FILESADDED] = filesadded

904

if filesremoved:

905

if filesremoved:

905

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

906

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

906

return sidedata

907

return sidedata

907

908

909

def getsidedataadder(srcrepo, destrepo):

910

def getsidedataadder(srcrepo, destrepo):

910

def sidedatacompanion(revlog, rev):

911

def sidedatacompanion(revlog, rev):

911

sidedata = {}

912

sidedata = {}

912

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

913

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

913

sidedata = _getsidedata(srcrepo, rev)

914

sidedata = _getsidedata(srcrepo, rev)

914

return False, (), sidedata

915

return False, (), sidedata

915

916

return sidedatacompanion

917

return sidedatacompanion

917

918

919

def getsidedataremover(srcrepo, destrepo):

920

def getsidedataremover(srcrepo, destrepo):

920

def sidedatacompanion(revlog, rev):

921

def sidedatacompanion(revlog, rev):

921

f = ()

922

f = ()

922

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

923

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

923

if revlog.flags(rev) & REVIDX_SIDEDATA:

924

if revlog.flags(rev) & REVIDX_SIDEDATA:

924

f = (

925

f = (

925

sidedatamod.SD_P1COPIES,

926

sidedatamod.SD_P1COPIES,

926

sidedatamod.SD_P2COPIES,

927

sidedatamod.SD_P2COPIES,

927

sidedatamod.SD_FILESADDED,

928

sidedatamod.SD_FILESADDED,

928

sidedatamod.SD_FILESREMOVED,

929

sidedatamod.SD_FILESREMOVED,

929

)

930

)

930

return False, f, {}

931

return False, f, {}

931

932

return sidedatacompanion

933

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo()
                 children = {}
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                 roots = set(children) - set(missingrevs)
                 work = list(roots)
                 all_copies = {r: {} for r in roots}
                 heapq.heapify(work)
                 alwaysmatch = match.always()
                 while work:
                     r = heapq.heappop(work)
                     copies = all_copies.pop(r)
                     if r == b.rev():
                         return copies
                     for i, c in enumerate(children[r]):
                         childctx = repo[c]
+                        p1, p2 = cl.parentrevs(c)
                         p1copies, p2copies = childctx._copies
-                        if r == childctx.p1().rev():
+                        if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
-                            assert r == childctx.p2().rev()
+                            assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         # Copy the dict only if later iterations will also need it
                         if i != len(children[r]) - 1:
                             newcopies = copies.copy()
                         else:
                             newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                         for f in childctx.filesremoved():
                             if f in newcopies:
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             heapq.heappush(work, c)
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 assert False
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion