upstream/mercurial-mirror Commit - r43547:82dabad5

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

from .revlogutils.flagutil import REVIDX_SIDEDATA

18

19

from . import (

19

from . import (

20

error,

20

error,

21

match as matchmod,

21

match as matchmod,

22

node,

22

node,

23

pathutil,

23

pathutil,

24

pycompat,

24

pycompat,

25

util,

25

util,

26

)

26

)

27

28

from .revlogutils import sidedata as sidedatamod

28

from .revlogutils import sidedata as sidedatamod

29

30

from .utils import stringutil

30

from .utils import stringutil

31

32

33

def _filter(src, dst, t):

33

def _filter(src, dst, t):

34

"""filters out invalid copies after chaining"""

34

"""filters out invalid copies after chaining"""

35

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

36

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

37

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

38

# in the following table (not including trivial cases). For example, case 2

38

# in the following table (not including trivial cases). For example, case 2

39

# is where a file existed in 'src' and remained under that name in 'mid' and

39

# is where a file existed in 'src' and remained under that name in 'mid' and

40

# then was renamed between 'mid' and 'dst'.

40

# then was renamed between 'mid' and 'dst'.

41

#

41

#

42

# case src mid dst result

42

# case src mid dst result

43

# 1 x y - -

43

# 1 x y - -

44

# 2 x y y x->y

44

# 2 x y y x->y

45

# 3 x y x -

45

# 3 x y x -

46

# 4 x y z x->z

46

# 4 x y z x->z

47

# 5 - x y -

47

# 5 - x y -

48

# 6 x x y x->y

48

# 6 x x y x->y

49

#

49

#

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

50

# _chain() takes care of chaining the copies in 'a' and 'b', but it

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

51

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

52

# between 5 and 6, so it includes all cases in its result.

52

# between 5 and 6, so it includes all cases in its result.

53

# Cases 1, 3, and 5 are then removed by _filter().

53

# Cases 1, 3, and 5 are then removed by _filter().

54

55

for k, v in list(t.items()):

55

for k, v in list(t.items()):

56

# remove copies from files that didn't exist

56

# remove copies from files that didn't exist

57

if v not in src:

57

if v not in src:

58

del t[k]

58

del t[k]

59

# remove criss-crossed copies

59

# remove criss-crossed copies

60

elif k in src and v in dst:

60

elif k in src and v in dst:

61

del t[k]

61

del t[k]

62

# remove copies to files that were then removed

62

# remove copies to files that were then removed

63

elif k not in dst:

63

elif k not in dst:

64

del t[k]

64

del t[k]

65

66

67

def _chain(a, b):

67

def _chain(a, b):

68

"""chain two sets of copies 'a' and 'b'"""

68

"""chain two sets of copies 'a' and 'b'"""

69

t = a.copy()

69

t = a.copy()

70

for k, v in pycompat.iteritems(b):

70

for k, v in pycompat.iteritems(b):

71

if v in t:

71

if v in t:

72

t[k] = t[v]

72

t[k] = t[v]

73

else:

73

else:

74

t[k] = v

74

t[k] = v

75

return t

75

return t

76

77

78

def _tracefile(fctx, am, basemf):

78

def _tracefile(fctx, am, basemf):

79

"""return file context that is the ancestor of fctx present in ancestor

79

"""return file context that is the ancestor of fctx present in ancestor

80

manifest am

80

manifest am

81

82

Note: we used to try and stop after a given limit, however checking if that

82

Note: we used to try and stop after a given limit, however checking if that

83

limit is reached turned out to be very expensive. we are better off

83

limit is reached turned out to be very expensive. we are better off

84

disabling that feature."""

84

disabling that feature."""

85

86

for f in fctx.ancestors():

86

for f in fctx.ancestors():

87

path = f.path()

87

path = f.path()

88

if am.get(path, None) == f.filenode():

88

if am.get(path, None) == f.filenode():

89

return path

89

return path

90

if basemf and basemf.get(path, None) == f.filenode():

90

if basemf and basemf.get(path, None) == f.filenode():

91

return path

91

return path

92

93

94

def _dirstatecopies(repo, match=None):

94

def _dirstatecopies(repo, match=None):

95

ds = repo.dirstate

95

ds = repo.dirstate

96

c = ds.copies().copy()

96

c = ds.copies().copy()

97

for k in list(c):

97

for k in list(c):

98

if ds[k] not in b'anm' or (match and not match(k)):

98

if ds[k] not in b'anm' or (match and not match(k)):

99

del c[k]

99

del c[k]

100

return c

100

return c

101

102

103

def _computeforwardmissing(a, b, match=None):

103

def _computeforwardmissing(a, b, match=None):

104

"""Computes which files are in b but not a.

104

"""Computes which files are in b but not a.

105

This is its own function so extensions can easily wrap this call to see what

105

This is its own function so extensions can easily wrap this call to see what

106

files _forwardcopies is about to process.

106

files _forwardcopies is about to process.

107

"""

107

"""

108

ma = a.manifest()

108

ma = a.manifest()

109

mb = b.manifest()

109

mb = b.manifest()

110

return mb.filesnotin(ma, match=match)

110

return mb.filesnotin(ma, match=match)

111

112

113

def usechangesetcentricalgo(repo):

113

def usechangesetcentricalgo(repo):

114

"""Checks if we should use changeset-centric copy algorithms"""

114

"""Checks if we should use changeset-centric copy algorithms"""

115

if repo.filecopiesmode == b'changeset-sidedata':

115

if repo.filecopiesmode == b'changeset-sidedata':

116

return True

116

return True

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

117

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

118

changesetsource = (b'changeset-only', b'compatibility')

118

changesetsource = (b'changeset-only', b'compatibility')

119

return readfrom in changesetsource

119

return readfrom in changesetsource

120

121

122

def _committedforwardcopies(a, b, base, match):

122

def _committedforwardcopies(a, b, base, match):

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

123

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

124

# files might have to be traced back to the fctx parent of the last

124

# files might have to be traced back to the fctx parent of the last

125

# one-side-only changeset, but not further back than that

125

# one-side-only changeset, but not further back than that

126

repo = a._repo

126

repo = a._repo

127

128

if usechangesetcentricalgo(repo):

128

if usechangesetcentricalgo(repo):

129

return _changesetforwardcopies(a, b, match)

129

return _changesetforwardcopies(a, b, match)

130

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

131

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

132

dbg = repo.ui.debug

132

dbg = repo.ui.debug

133

if debug:

133

if debug:

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

134

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

135

am = a.manifest()

135

am = a.manifest()

136

basemf = None if base is None else base.manifest()

136

basemf = None if base is None else base.manifest()

137

138

# find where new files came from

138

# find where new files came from

139

# we currently don't try to find where old files went, too expensive

139

# we currently don't try to find where old files went, too expensive

140

# this means we can miss a case like 'hg rm b; hg cp a b'

140

# this means we can miss a case like 'hg rm b; hg cp a b'

141

cm = {}

141

cm = {}

142

143

# Computing the forward missing is quite expensive on large manifests, since

143

# Computing the forward missing is quite expensive on large manifests, since

144

# it compares the entire manifests. We can optimize it in the common use

144

# it compares the entire manifests. We can optimize it in the common use

145

# case of computing what copies are in a commit versus its parent (like

145

# case of computing what copies are in a commit versus its parent (like

146

# during a rebase or histedit). Note, we exclude merge commits from this

146

# during a rebase or histedit). Note, we exclude merge commits from this

147

# optimization, since the ctx.files() for a merge commit is not correct for

147

# optimization, since the ctx.files() for a merge commit is not correct for

148

# this comparison.

148

# this comparison.

149

forwardmissingmatch = match

149

forwardmissingmatch = match

150

if b.p1() == a and b.p2().node() == node.nullid:

150

if b.p1() == a and b.p2().node() == node.nullid:

151

filesmatcher = matchmod.exact(b.files())

151

filesmatcher = matchmod.exact(b.files())

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

152

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

153

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

154

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

155

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

156

157

if debug:

157

if debug:

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

158

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

159

160

for f in sorted(missing):

160

for f in sorted(missing):

161

if debug:

161

if debug:

162

dbg(b'debug.copies: tracing file: %s\n' % f)

162

dbg(b'debug.copies: tracing file: %s\n' % f)

163

fctx = b[f]

163

fctx = b[f]

164

fctx._ancestrycontext = ancestrycontext

164

fctx._ancestrycontext = ancestrycontext

165

166

if debug:

166

if debug:

167

start = util.timer()

167

start = util.timer()

168

opath = _tracefile(fctx, am, basemf)

168

opath = _tracefile(fctx, am, basemf)

169

if opath:

169

if opath:

170

if debug:

170

if debug:

171

dbg(b'debug.copies: rename of: %s\n' % opath)

171

dbg(b'debug.copies: rename of: %s\n' % opath)

172

cm[f] = opath

172

cm[f] = opath

173

if debug:

173

if debug:

174

dbg(

174

dbg(

175

b'debug.copies: time: %f seconds\n'

175

b'debug.copies: time: %f seconds\n'

176

% (util.timer() - start)

176

% (util.timer() - start)

177

)

177

)

178

return cm

178

return cm

179

180

181

def _changesetforwardcopies(a, b, match):

181

def _changesetforwardcopies(a, b, match):

182

if a.rev() in (node.nullrev, b.rev()):

182

if a.rev() in (node.nullrev, b.rev()):

183

return {}

183

return {}

184

185

repo = a.repo()

185

repo = a.repo()

186

children = {}

186

children = {}

187

cl = repo.changelog

187

cl = repo.changelog

188

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

188

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

189

for r in missingrevs:

189

for r in missingrevs:

190

for p in cl.parentrevs(r):

190

for p in cl.parentrevs(r):

191

if p == node.nullrev:

191

if p == node.nullrev:

192

continue

192

continue

193

if p not in children:

193

if p not in children:

194

children[p] = [r]

194

children[p] = [r]

195

else:

195

else:

196

children[p].append(r)

196

children[p].append(r)

197

198

roots = set(children) - set(missingrevs)

198

roots = set(children) - set(missingrevs)

199

work = list(roots)

199

work = list(roots)

200

all_copies = {r: {} for r in roots}

200

all_copies = {r: {} for r in roots}

201

heapq.heapify(work)

201

heapq.heapify(work)

202

alwaysmatch = match.always()

202

alwaysmatch = match.always()

203

while work:

203

while work:

204

r = heapq.heappop(work)

204

r = heapq.heappop(work)

205

copies = all_copies.pop(r)

205

copies = all_copies.pop(r)

206

if r == b.rev():

206

if r == b.rev():

207

return copies

207

return copies

208

for i, c in enumerate(children[r]):

208

for i, c in enumerate(children[r]):

209

childctx = repo[c]

209

childctx = repo[c]

210

p1copies, p2copies = childctx._copies

210

if r == childctx.p1().rev():

211

if r == childctx.p1().rev():

211

parent = 1

212

parent = 1

212

childcopies = ~~childctx~~.p1copies()

213

childcopies = p1copies

213

else:

214

else:

214

assert r == childctx.p2().rev()

215

assert r == childctx.p2().rev()

215

parent = 2

216

parent = 2

216

childcopies = ~~childctx~~.p2copies()

217

childcopies = p2copies

217

if not alwaysmatch:

218

if not alwaysmatch:

218

childcopies = {

219

childcopies = {

219

dst: src for dst, src in childcopies.items() if match(dst)

220

dst: src for dst, src in childcopies.items() if match(dst)

220

}

221

}

221

# Copy the dict only if later iterations will also need it

222

# Copy the dict only if later iterations will also need it

222

if i != len(children[r]) - 1:

223

if i != len(children[r]) - 1:

223

newcopies = copies.copy()

224

newcopies = copies.copy()

224

else:

225

else:

225

newcopies = copies

226

newcopies = copies

226

if childcopies:

227

if childcopies:

227

newcopies = _chain(newcopies, childcopies)

228

newcopies = _chain(newcopies, childcopies)

228

for f in childctx.filesremoved():

229

for f in childctx.filesremoved():

229

if f in newcopies:

230

if f in newcopies:

230

del newcopies[f]

231

del newcopies[f]

231

othercopies = all_copies.get(c)

232

othercopies = all_copies.get(c)

232

if othercopies is None:

233

if othercopies is None:

233

heapq.heappush(work, c)

234

heapq.heappush(work, c)

234

all_copies[c] = newcopies

235

all_copies[c] = newcopies

235

else:

236

else:

236

# we are the second parent to work on c, we need to merge our

237

# we are the second parent to work on c, we need to merge our

237

# work with the other.

238

# work with the other.

238

#

239

#

239

# Unlike when copies are stored in the filelog, we consider

240

# Unlike when copies are stored in the filelog, we consider

240

# it a copy even if the destination already existed on the

241

# it a copy even if the destination already existed on the

241

# other branch. It's simply too expensive to check if the

242

# other branch. It's simply too expensive to check if the

242

# file existed in the manifest.

243

# file existed in the manifest.

243

#

244

#

244

# In case of conflict, parent 1 take precedence over parent 2.

245

# In case of conflict, parent 1 take precedence over parent 2.

245

# This is an arbitrary choice made anew when implementing

246

# This is an arbitrary choice made anew when implementing

246

# changeset based copies. It was made without regards with

247

# changeset based copies. It was made without regards with

247

# potential filelog related behavior.

248

# potential filelog related behavior.

248

if parent == 1:

249

if parent == 1:

249

othercopies.update(newcopies)

250

othercopies.update(newcopies)

250

else:

251

else:

251

newcopies.update(othercopies)

252

newcopies.update(othercopies)

252

all_copies[c] = newcopies

253

all_copies[c] = newcopies

253

assert False

254

assert False

254

255

256

def _forwardcopies(a, b, base=None, match=None):

257

def _forwardcopies(a, b, base=None, match=None):

257

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

258

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

258

259

if base is None:

260

if base is None:

260

base = a

261

base = a

261

match = a.repo().narrowmatch(match)

262

match = a.repo().narrowmatch(match)

262

# check for working copy

263

# check for working copy

263

if b.rev() is None:

264

if b.rev() is None:

264

cm = _committedforwardcopies(a, b.p1(), base, match)

265

cm = _committedforwardcopies(a, b.p1(), base, match)

265

# combine copies from dirstate if necessary

266

# combine copies from dirstate if necessary

266

copies = _chain(cm, _dirstatecopies(b._repo, match))

267

copies = _chain(cm, _dirstatecopies(b._repo, match))

267

else:

268

else:

268

copies = _committedforwardcopies(a, b, base, match)

269

copies = _committedforwardcopies(a, b, base, match)

269

return copies

270

return copies

270

271

272

def _backwardrenames(a, b, match):

273

def _backwardrenames(a, b, match):

273

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

274

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

274

return {}

275

return {}

275

276

# Even though we're not taking copies into account, 1:n rename situations

277

# Even though we're not taking copies into account, 1:n rename situations

277

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

278

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

278

# arbitrarily pick one of the renames.

279

# arbitrarily pick one of the renames.

279

# We don't want to pass in "match" here, since that would filter

280

# We don't want to pass in "match" here, since that would filter

280

# the destination by it. Since we're reversing the copies, we want

281

# the destination by it. Since we're reversing the copies, we want

281

# to filter the source instead.

282

# to filter the source instead.

282

f = _forwardcopies(b, a)

283

f = _forwardcopies(b, a)

283

r = {}

284

r = {}

284

for k, v in sorted(pycompat.iteritems(f)):

285

for k, v in sorted(pycompat.iteritems(f)):

285

if match and not match(v):

286

if match and not match(v):

286

continue

287

continue

287

# remove copies

288

# remove copies

288

if v in a:

289

if v in a:

289

continue

290

continue

290

r[v] = k

291

r[v] = k

291

return r

292

return r

292

293

294

def pathcopies(x, y, match=None):

295

def pathcopies(x, y, match=None):

295

"""find {dst@y: src@x} copy mapping for directed compare"""

296

"""find {dst@y: src@x} copy mapping for directed compare"""

296

repo = x._repo

297

repo = x._repo

297

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

298

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

298

if debug:

299

if debug:

299

repo.ui.debug(

300

repo.ui.debug(

300

b'debug.copies: searching copies from %s to %s\n' % (x, y)

301

b'debug.copies: searching copies from %s to %s\n' % (x, y)

301

)

302

)

302

if x == y or not x or not y:

303

if x == y or not x or not y:

303

return {}

304

return {}

304

a = y.ancestor(x)

305

a = y.ancestor(x)

305

if a == x:

306

if a == x:

306

if debug:

307

if debug:

307

repo.ui.debug(b'debug.copies: search mode: forward\n')

308

repo.ui.debug(b'debug.copies: search mode: forward\n')

308

if y.rev() is None and x == y.p1():

309

if y.rev() is None and x == y.p1():

309

# short-circuit to avoid issues with merge states

310

# short-circuit to avoid issues with merge states

310

return _dirstatecopies(repo, match)

311

return _dirstatecopies(repo, match)

311

copies = _forwardcopies(x, y, match=match)

312

copies = _forwardcopies(x, y, match=match)

312

elif a == y:

313

elif a == y:

313

if debug:

314

if debug:

314

repo.ui.debug(b'debug.copies: search mode: backward\n')

315

repo.ui.debug(b'debug.copies: search mode: backward\n')

315

copies = _backwardrenames(x, y, match=match)

316

copies = _backwardrenames(x, y, match=match)

316

else:

317

else:

317

if debug:

318

if debug:

318

repo.ui.debug(b'debug.copies: search mode: combined\n')

319

repo.ui.debug(b'debug.copies: search mode: combined\n')

319

base = None

320

base = None

320

if a.rev() != node.nullrev:

321

if a.rev() != node.nullrev:

321

base = x

322

base = x

322

copies = _chain(

323

copies = _chain(

323

_backwardrenames(x, a, match=match),

324

_backwardrenames(x, a, match=match),

324

_forwardcopies(a, y, base, match=match),

325

_forwardcopies(a, y, base, match=match),

325

)

326

)

326

_filter(x, y, copies)

327

_filter(x, y, copies)

327

return copies

328

return copies

328

329

330

def mergecopies(repo, c1, c2, base):

331

def mergecopies(repo, c1, c2, base):

331

"""

332

"""

332

Finds moves and copies between context c1 and c2 that are relevant for

333

Finds moves and copies between context c1 and c2 that are relevant for

333

merging. 'base' will be used as the merge base.

334

merging. 'base' will be used as the merge base.

334

335

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

336

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

336

files that were moved/ copied in one merge parent and modified in another.

337

files that were moved/ copied in one merge parent and modified in another.

337

For example:

338

For example:

338

339

o ---> 4 another commit

340

o ---> 4 another commit

340

|

341

|

341

| o ---> 3 commit that modifies a.txt

342

| o ---> 3 commit that modifies a.txt

342

| /

343

| /

343

o / ---> 2 commit that moves a.txt to b.txt

344

o / ---> 2 commit that moves a.txt to b.txt

344

|/

345

|/

345

o ---> 1 merge base

346

o ---> 1 merge base

346

347

If we try to rebase revision 3 on revision 4, since there is no a.txt in

348

If we try to rebase revision 3 on revision 4, since there is no a.txt in

348

revision 4, and if user have copytrace disabled, we prints the following

349

revision 4, and if user have copytrace disabled, we prints the following

349

message:

350

message:

350

351

```other changed <file> which local deleted```

352

```other changed <file> which local deleted```

352

353

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

354

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

354

"dirmove".

355

"dirmove".

355

356

"copy" is a mapping from destination name -> source name,

357

"copy" is a mapping from destination name -> source name,

357

where source is in c1 and destination is in c2 or vice-versa.

358

where source is in c1 and destination is in c2 or vice-versa.

358

359

"movewithdir" is a mapping from source name -> destination name,

360

"movewithdir" is a mapping from source name -> destination name,

360

where the file at source present in one context but not the other

361

where the file at source present in one context but not the other

361

needs to be moved to destination by the merge process, because the

362

needs to be moved to destination by the merge process, because the

362

other context moved the directory it is in.

363

other context moved the directory it is in.

363

364

"diverge" is a mapping of source name -> list of destination names

365

"diverge" is a mapping of source name -> list of destination names

365

for divergent renames.

366

for divergent renames.

366

367

"renamedelete" is a mapping of source name -> list of destination

368

"renamedelete" is a mapping of source name -> list of destination

368

names for files deleted in c1 that were renamed in c2 or vice-versa.

369

names for files deleted in c1 that were renamed in c2 or vice-versa.

369

370

"dirmove" is a mapping of detected source dir -> destination dir renames.

371

"dirmove" is a mapping of detected source dir -> destination dir renames.

371

This is needed for handling changes to new files previously grafted into

372

This is needed for handling changes to new files previously grafted into

372

renamed directories.

373

renamed directories.

373

374

This function calls different copytracing algorithms based on config.

375

This function calls different copytracing algorithms based on config.

375

"""

376

"""

376

# avoid silly behavior for update from empty dir

377

# avoid silly behavior for update from empty dir

377

if not c1 or not c2 or c1 == c2:

378

if not c1 or not c2 or c1 == c2:

378

return {}, {}, {}, {}, {}

379

return {}, {}, {}, {}, {}

379

380

narrowmatch = c1.repo().narrowmatch()

381

narrowmatch = c1.repo().narrowmatch()

381

382

# avoid silly behavior for parent -> working dir

383

# avoid silly behavior for parent -> working dir

383

if c2.node() is None and c1.node() == repo.dirstate.p1():

384

if c2.node() is None and c1.node() == repo.dirstate.p1():

384

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

385

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

385

386

copytracing = repo.ui.config(b'experimental', b'copytrace')

387

copytracing = repo.ui.config(b'experimental', b'copytrace')

387

if stringutil.parsebool(copytracing) is False:

388

if stringutil.parsebool(copytracing) is False:

388

# stringutil.parsebool() returns None when it is unable to parse the

389

# stringutil.parsebool() returns None when it is unable to parse the

389

# value, so we should rely on making sure copytracing is on such cases

390

# value, so we should rely on making sure copytracing is on such cases

390

return {}, {}, {}, {}, {}

391

return {}, {}, {}, {}, {}

391

392

if usechangesetcentricalgo(repo):

393

if usechangesetcentricalgo(repo):

393

# The heuristics don't make sense when we need changeset-centric algos

394

# The heuristics don't make sense when we need changeset-centric algos

394

return _fullcopytracing(repo, c1, c2, base)

395

return _fullcopytracing(repo, c1, c2, base)

395

396

# Copy trace disabling is explicitly below the node == p1 logic above

397

# Copy trace disabling is explicitly below the node == p1 logic above

397

# because the logic above is required for a simple copy to be kept across a

398

# because the logic above is required for a simple copy to be kept across a

398

# rebase.

399

# rebase.

399

if copytracing == b'heuristics':

400

if copytracing == b'heuristics':

400

# Do full copytracing if only non-public revisions are involved as

401

# Do full copytracing if only non-public revisions are involved as

401

# that will be fast enough and will also cover the copies which could

402

# that will be fast enough and will also cover the copies which could

402

# be missed by heuristics

403

# be missed by heuristics

403

if _isfullcopytraceable(repo, c1, base):

404

if _isfullcopytraceable(repo, c1, base):

404

return _fullcopytracing(repo, c1, c2, base)

405

return _fullcopytracing(repo, c1, c2, base)

405

return _heuristicscopytracing(repo, c1, c2, base)

406

return _heuristicscopytracing(repo, c1, c2, base)

406

else:

407

else:

407

return _fullcopytracing(repo, c1, c2, base)

408

return _fullcopytracing(repo, c1, c2, base)

408

409

410

def _isfullcopytraceable(repo, c1, base):

411

def _isfullcopytraceable(repo, c1, base):

411

""" Checks that if base, source and destination are all no-public branches,

412

""" Checks that if base, source and destination are all no-public branches,

412

if yes let's use the full copytrace algorithm for increased capabilities

413

if yes let's use the full copytrace algorithm for increased capabilities

413

since it will be fast enough.

414

since it will be fast enough.

414

415

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

416

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

416

number of changesets from c1 to base such that if number of changesets are

417

number of changesets from c1 to base such that if number of changesets are

417

more than the limit, full copytracing algorithm won't be used.

418

more than the limit, full copytracing algorithm won't be used.

418

"""

419

"""

419

if c1.rev() is None:

420

if c1.rev() is None:

420

c1 = c1.p1()

421

c1 = c1.p1()

421

if c1.mutable() and base.mutable():

422

if c1.mutable() and base.mutable():

422

sourcecommitlimit = repo.ui.configint(

423

sourcecommitlimit = repo.ui.configint(

423

b'experimental', b'copytrace.sourcecommitlimit'

424

b'experimental', b'copytrace.sourcecommitlimit'

424

)

425

)

425

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

426

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

426

return commits < sourcecommitlimit

427

return commits < sourcecommitlimit

427

return False

428

return False

428

429

430

def _checksinglesidecopies(

431

def _checksinglesidecopies(

431

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

432

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

432

):

433

):

433

if src not in m2:

434

if src not in m2:

434

# deleted on side 2

435

# deleted on side 2

435

if src not in m1:

436

if src not in m1:

436

# renamed on side 1, deleted on side 2

437

# renamed on side 1, deleted on side 2

437

renamedelete[src] = dsts1

438

renamedelete[src] = dsts1

438

elif m2[src] != mb[src]:

439

elif m2[src] != mb[src]:

439

if not _related(c2[src], base[src]):

440

if not _related(c2[src], base[src]):

440

return

441

return

441

# modified on side 2

442

# modified on side 2

442

for dst in dsts1:

443

for dst in dsts1:

443

if dst not in m2:

444

if dst not in m2:

444

# dst not added on side 2 (handle as regular

445

# dst not added on side 2 (handle as regular

445

# "both created" case in manifestmerge otherwise)

446

# "both created" case in manifestmerge otherwise)

446

copy[dst] = src

447

copy[dst] = src

447

448

449

def _fullcopytracing(repo, c1, c2, base):

450

def _fullcopytracing(repo, c1, c2, base):

450

""" The full copytracing algorithm which finds all the new files that were

451

""" The full copytracing algorithm which finds all the new files that were

451

added from merge base up to the top commit and for each file it checks if

452

added from merge base up to the top commit and for each file it checks if

452

this file was copied from another file.

453

this file was copied from another file.

453

454

This is pretty slow when a lot of changesets are involved but will track all

455

This is pretty slow when a lot of changesets are involved but will track all

455

the copies.

456

the copies.

456

"""

457

"""

457

m1 = c1.manifest()

458

m1 = c1.manifest()

458

m2 = c2.manifest()

459

m2 = c2.manifest()

459

mb = base.manifest()

460

mb = base.manifest()

460

461

copies1 = pathcopies(base, c1)

462

copies1 = pathcopies(base, c1)

462

copies2 = pathcopies(base, c2)

463

copies2 = pathcopies(base, c2)

463

464

inversecopies1 = {}

465

inversecopies1 = {}

465

inversecopies2 = {}

466

inversecopies2 = {}

466

for dst, src in copies1.items():

467

for dst, src in copies1.items():

467

inversecopies1.setdefault(src, []).append(dst)

468

inversecopies1.setdefault(src, []).append(dst)

468

for dst, src in copies2.items():

469

for dst, src in copies2.items():

469

inversecopies2.setdefault(src, []).append(dst)

470

inversecopies2.setdefault(src, []).append(dst)

470

471

copy = {}

472

copy = {}

472

diverge = {}

473

diverge = {}

473

renamedelete = {}

474

renamedelete = {}

474

allsources = set(inversecopies1) | set(inversecopies2)

475

allsources = set(inversecopies1) | set(inversecopies2)

475

for src in allsources:

476

for src in allsources:

476

dsts1 = inversecopies1.get(src)

477

dsts1 = inversecopies1.get(src)

477

dsts2 = inversecopies2.get(src)

478

dsts2 = inversecopies2.get(src)

478

if dsts1 and dsts2:

479

if dsts1 and dsts2:

479

# copied/renamed on both sides

480

# copied/renamed on both sides

480

if src not in m1 and src not in m2:

481

if src not in m1 and src not in m2:

481

# renamed on both sides

482

# renamed on both sides

482

dsts1 = set(dsts1)

483

dsts1 = set(dsts1)

483

dsts2 = set(dsts2)

484

dsts2 = set(dsts2)

484

# If there's some overlap in the rename destinations, we

485

# If there's some overlap in the rename destinations, we

485

# consider it not divergent. For example, if side 1 copies 'a'

486

# consider it not divergent. For example, if side 1 copies 'a'

486

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

487

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

487

# and 'd' and deletes 'a'.

488

# and 'd' and deletes 'a'.

488

if dsts1 & dsts2:

489

if dsts1 & dsts2:

489

for dst in dsts1 & dsts2:

490

for dst in dsts1 & dsts2:

490

copy[dst] = src

491

copy[dst] = src

491

else:

492

else:

492

diverge[src] = sorted(dsts1 | dsts2)

493

diverge[src] = sorted(dsts1 | dsts2)

493

elif src in m1 and src in m2:

494

elif src in m1 and src in m2:

494

# copied on both sides

495

# copied on both sides

495

dsts1 = set(dsts1)

496

dsts1 = set(dsts1)

496

dsts2 = set(dsts2)

497

dsts2 = set(dsts2)

497

for dst in dsts1 & dsts2:

498

for dst in dsts1 & dsts2:

498

copy[dst] = src

499

copy[dst] = src

499

# TODO: Handle cases where it was renamed on one side and copied

500

# TODO: Handle cases where it was renamed on one side and copied

500

# on the other side

501

# on the other side

501

elif dsts1:

502

elif dsts1:

502

# copied/renamed only on side 1

503

# copied/renamed only on side 1

503

_checksinglesidecopies(

504

_checksinglesidecopies(

504

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

505

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

505

)

506

)

506

elif dsts2:

507

elif dsts2:

507

# copied/renamed only on side 2

508

# copied/renamed only on side 2

508

_checksinglesidecopies(

509

_checksinglesidecopies(

509

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

510

src, dsts2, m2, m1, mb, c1, base, copy, renamedelete

510

)

511

)

511

512

renamedeleteset = set()

513

renamedeleteset = set()

513

divergeset = set()

514

divergeset = set()

514

for dsts in diverge.values():

515

for dsts in diverge.values():

515

divergeset.update(dsts)

516

divergeset.update(dsts)

516

for dsts in renamedelete.values():

517

for dsts in renamedelete.values():

517

renamedeleteset.update(dsts)

518

renamedeleteset.update(dsts)

518

519

# find interesting file sets from manifests

520

# find interesting file sets from manifests

520

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

521

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

521

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

522

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

522

u1 = sorted(addedinm1 - addedinm2)

523

u1 = sorted(addedinm1 - addedinm2)

523

u2 = sorted(addedinm2 - addedinm1)

524

u2 = sorted(addedinm2 - addedinm1)

524

525

header = b" unmatched files in %s"

526

header = b" unmatched files in %s"

526

if u1:

527

if u1:

527

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

528

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

528

if u2:

529

if u2:

529

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

530

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

530

531

fullcopy = copies1.copy()

532

fullcopy = copies1.copy()

532

fullcopy.update(copies2)

533

fullcopy.update(copies2)

533

if not fullcopy:

534

if not fullcopy:

534

return copy, {}, diverge, renamedelete, {}

535

return copy, {}, diverge, renamedelete, {}

535

536

if repo.ui.debugflag:

537

if repo.ui.debugflag:

537

repo.ui.debug(

538

repo.ui.debug(

538

b" all copies found (* = to merge, ! = divergent, "

539

b" all copies found (* = to merge, ! = divergent, "

539

b"% = renamed and deleted):\n"

540

b"% = renamed and deleted):\n"

540

)

541

)

541

for f in sorted(fullcopy):

542

for f in sorted(fullcopy):

542

note = b""

543

note = b""

543

if f in copy:

544

if f in copy:

544

note += b"*"

545

note += b"*"

545

if f in divergeset:

546

if f in divergeset:

546

note += b"!"

547

note += b"!"

547

if f in renamedeleteset:

548

if f in renamedeleteset:

548

note += b"%"

549

note += b"%"

549

repo.ui.debug(

550

repo.ui.debug(

550

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

551

b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)

551

)

552

)

552

del divergeset

553

del divergeset

553

554

repo.ui.debug(b" checking for directory renames\n")

555

repo.ui.debug(b" checking for directory renames\n")

555

556

# generate a directory move map

557

# generate a directory move map

557

d1, d2 = c1.dirs(), c2.dirs()

558

d1, d2 = c1.dirs(), c2.dirs()

558

invalid = set()

559

invalid = set()

559

dirmove = {}

560

dirmove = {}

560

561

# examine each file copy for a potential directory move, which is

562

# examine each file copy for a potential directory move, which is

562

# when all the files in a directory are moved to a new directory

563

# when all the files in a directory are moved to a new directory

563

for dst, src in pycompat.iteritems(fullcopy):

564

for dst, src in pycompat.iteritems(fullcopy):

564

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

565

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

565

if dsrc in invalid:

566

if dsrc in invalid:

566

# already seen to be uninteresting

567

# already seen to be uninteresting

567

continue

568

continue

568

elif dsrc in d1 and ddst in d1:

569

elif dsrc in d1 and ddst in d1:

569

# directory wasn't entirely moved locally

570

# directory wasn't entirely moved locally

570

invalid.add(dsrc)

571

invalid.add(dsrc)

571

elif dsrc in d2 and ddst in d2:

572

elif dsrc in d2 and ddst in d2:

572

# directory wasn't entirely moved remotely

573

# directory wasn't entirely moved remotely

573

invalid.add(dsrc)

574

invalid.add(dsrc)

574

elif dsrc in dirmove and dirmove[dsrc] != ddst:

575

elif dsrc in dirmove and dirmove[dsrc] != ddst:

575

# files from the same directory moved to two different places

576

# files from the same directory moved to two different places

576

invalid.add(dsrc)

577

invalid.add(dsrc)

577

else:

578

else:

578

# looks good so far

579

# looks good so far

579

dirmove[dsrc] = ddst

580

dirmove[dsrc] = ddst

580

581

for i in invalid:

582

for i in invalid:

582

if i in dirmove:

583

if i in dirmove:

583

del dirmove[i]

584

del dirmove[i]

584

del d1, d2, invalid

585

del d1, d2, invalid

585

586

if not dirmove:

587

if not dirmove:

587

return copy, {}, diverge, renamedelete, {}

588

return copy, {}, diverge, renamedelete, {}

588

589

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

590

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

590

591

for d in dirmove:

592

for d in dirmove:

592

repo.ui.debug(

593

repo.ui.debug(

593

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

594

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

594

)

595

)

595

596

movewithdir = {}

597

movewithdir = {}

597

# check unaccounted nonoverlapping files against directory moves

598

# check unaccounted nonoverlapping files against directory moves

598

for f in u1 + u2:

599

for f in u1 + u2:

599

if f not in fullcopy:

600

if f not in fullcopy:

600

for d in dirmove:

601

for d in dirmove:

601

if f.startswith(d):

602

if f.startswith(d):

602

# new file added in a directory that was moved, move it

603

# new file added in a directory that was moved, move it

603

df = dirmove[d] + f[len(d) :]

604

df = dirmove[d] + f[len(d) :]

604

if df not in copy:

605

if df not in copy:

605

movewithdir[f] = df

606

movewithdir[f] = df

606

repo.ui.debug(

607

repo.ui.debug(

607

b" pending file src: '%s' -> dst: '%s'\n"

608

b" pending file src: '%s' -> dst: '%s'\n"

608

% (f, df)

609

% (f, df)

609

)

610

)

610

break

611

break

611

612

return copy, movewithdir, diverge, renamedelete, dirmove

613

return copy, movewithdir, diverge, renamedelete, dirmove

613

614

615

def _heuristicscopytracing(repo, c1, c2, base):

616

def _heuristicscopytracing(repo, c1, c2, base):

616

""" Fast copytracing using filename heuristics

617

""" Fast copytracing using filename heuristics

617

618

Assumes that moves or renames are of following two types:

619

Assumes that moves or renames are of following two types:

619

620

1) Inside a directory only (same directory name but different filenames)

621

1) Inside a directory only (same directory name but different filenames)

621

2) Move from one directory to another

622

2) Move from one directory to another

622

(same filenames but different directory names)

623

(same filenames but different directory names)

623

624

Works only when there are no merge commits in the "source branch".

625

Works only when there are no merge commits in the "source branch".

625

Source branch is commits from base up to c2 not including base.

626

Source branch is commits from base up to c2 not including base.

626

627

If merge is involved it fallbacks to _fullcopytracing().

628

If merge is involved it fallbacks to _fullcopytracing().

628

629

Can be used by setting the following config:

630

Can be used by setting the following config:

630

631

[experimental]

632

[experimental]

632

copytrace = heuristics

633

copytrace = heuristics

633

634

In some cases the copy/move candidates found by heuristics can be very large

635

In some cases the copy/move candidates found by heuristics can be very large

635

in number and that will make the algorithm slow. The number of possible

636

in number and that will make the algorithm slow. The number of possible

636

candidates to check can be limited by using the config

637

candidates to check can be limited by using the config

637

`experimental.copytrace.movecandidateslimit` which defaults to 100.

638

`experimental.copytrace.movecandidateslimit` which defaults to 100.

638

"""

639

"""

639

640

if c1.rev() is None:

641

if c1.rev() is None:

641

c1 = c1.p1()

642

c1 = c1.p1()

642

if c2.rev() is None:

643

if c2.rev() is None:

643

c2 = c2.p1()

644

c2 = c2.p1()

644

645

copies = {}

646

copies = {}

646

647

changedfiles = set()

648

changedfiles = set()

648

m1 = c1.manifest()

649

m1 = c1.manifest()

649

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

650

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

650

# If base is not in c2 branch, we switch to fullcopytracing

651

# If base is not in c2 branch, we switch to fullcopytracing

651

repo.ui.debug(

652

repo.ui.debug(

652

b"switching to full copytracing as base is not "

653

b"switching to full copytracing as base is not "

653

b"an ancestor of c2\n"

654

b"an ancestor of c2\n"

654

)

655

)

655

return _fullcopytracing(repo, c1, c2, base)

656

return _fullcopytracing(repo, c1, c2, base)

656

657

ctx = c2

658

ctx = c2

658

while ctx != base:

659

while ctx != base:

659

if len(ctx.parents()) == 2:

660

if len(ctx.parents()) == 2:

660

# To keep things simple let's not handle merges

661

# To keep things simple let's not handle merges

661

repo.ui.debug(b"switching to full copytracing because of merges\n")

662

repo.ui.debug(b"switching to full copytracing because of merges\n")

662

return _fullcopytracing(repo, c1, c2, base)

663

return _fullcopytracing(repo, c1, c2, base)

663

changedfiles.update(ctx.files())

664

changedfiles.update(ctx.files())

664

ctx = ctx.p1()

665

ctx = ctx.p1()

665

666

cp = _forwardcopies(base, c2)

667

cp = _forwardcopies(base, c2)

667

for dst, src in pycompat.iteritems(cp):

668

for dst, src in pycompat.iteritems(cp):

668

if src in m1:

669

if src in m1:

669

copies[dst] = src

670

copies[dst] = src

670

671

# file is missing if it isn't present in the destination, but is present in

672

# file is missing if it isn't present in the destination, but is present in

672

# the base and present in the source.

673

# the base and present in the source.

673

# Presence in the base is important to exclude added files, presence in the

674

# Presence in the base is important to exclude added files, presence in the

674

# source is important to exclude removed files.

675

# source is important to exclude removed files.

675

filt = lambda f: f not in m1 and f in base and f in c2

676

filt = lambda f: f not in m1 and f in base and f in c2

676

missingfiles = [f for f in changedfiles if filt(f)]

677

missingfiles = [f for f in changedfiles if filt(f)]

677

678

if missingfiles:

679

if missingfiles:

679

basenametofilename = collections.defaultdict(list)

680

basenametofilename = collections.defaultdict(list)

680

dirnametofilename = collections.defaultdict(list)

681

dirnametofilename = collections.defaultdict(list)

681

682

for f in m1.filesnotin(base.manifest()):

683

for f in m1.filesnotin(base.manifest()):

683

basename = os.path.basename(f)

684

basename = os.path.basename(f)

684

dirname = os.path.dirname(f)

685

dirname = os.path.dirname(f)

685

basenametofilename[basename].append(f)

686

basenametofilename[basename].append(f)

686

dirnametofilename[dirname].append(f)

687

dirnametofilename[dirname].append(f)

687

688

for f in missingfiles:

689

for f in missingfiles:

689

basename = os.path.basename(f)

690

basename = os.path.basename(f)

690

dirname = os.path.dirname(f)

691

dirname = os.path.dirname(f)

691

samebasename = basenametofilename[basename]

692

samebasename = basenametofilename[basename]

692

samedirname = dirnametofilename[dirname]

693

samedirname = dirnametofilename[dirname]

693

movecandidates = samebasename + samedirname

694

movecandidates = samebasename + samedirname

694

# f is guaranteed to be present in c2, that's why

695

# f is guaranteed to be present in c2, that's why

695

# c2.filectx(f) won't fail

696

# c2.filectx(f) won't fail

696

f2 = c2.filectx(f)

697

f2 = c2.filectx(f)

697

# we can have a lot of candidates which can slow down the heuristics

698

# we can have a lot of candidates which can slow down the heuristics

698

# config value to limit the number of candidates moves to check

699

# config value to limit the number of candidates moves to check

699

maxcandidates = repo.ui.configint(

700

maxcandidates = repo.ui.configint(

700

b'experimental', b'copytrace.movecandidateslimit'

701

b'experimental', b'copytrace.movecandidateslimit'

701

)

702

)

702

703

if len(movecandidates) > maxcandidates:

704

if len(movecandidates) > maxcandidates:

704

repo.ui.status(

705

repo.ui.status(

705

_(

706

_(

706

b"skipping copytracing for '%s', more "

707

b"skipping copytracing for '%s', more "

707

b"candidates than the limit: %d\n"

708

b"candidates than the limit: %d\n"

708

)

709

)

709

% (f, len(movecandidates))

710

% (f, len(movecandidates))

710

)

711

)

711

continue

712

continue

712

713

for candidate in movecandidates:

714

for candidate in movecandidates:

714

f1 = c1.filectx(candidate)

715

f1 = c1.filectx(candidate)

715

if _related(f1, f2):

716

if _related(f1, f2):

716

# if there are a few related copies then we'll merge

717

# if there are a few related copies then we'll merge

717

# changes into all of them. This matches the behaviour

718

# changes into all of them. This matches the behaviour

718

# of upstream copytracing

719

# of upstream copytracing

719

copies[candidate] = f

720

copies[candidate] = f

720

721

return copies, {}, {}, {}, {}

722

return copies, {}, {}, {}, {}

722

723

724

def _related(f1, f2):

725

def _related(f1, f2):

725

"""return True if f1 and f2 filectx have a common ancestor

726

"""return True if f1 and f2 filectx have a common ancestor

726

727

Walk back to common ancestor to see if the two files originate

728

Walk back to common ancestor to see if the two files originate

728

from the same file. Since workingfilectx's rev() is None it messes

729

from the same file. Since workingfilectx's rev() is None it messes

729

up the integer comparison logic, hence the pre-step check for

730

up the integer comparison logic, hence the pre-step check for

730

None (f1 and f2 can only be workingfilectx's initially).

731

None (f1 and f2 can only be workingfilectx's initially).

731

"""

732

"""

732

733

if f1 == f2:

734

if f1 == f2:

734

return True # a match

735

return True # a match

735

736

g1, g2 = f1.ancestors(), f2.ancestors()

737

g1, g2 = f1.ancestors(), f2.ancestors()

737

try:

738

try:

738

f1r, f2r = f1.linkrev(), f2.linkrev()

739

f1r, f2r = f1.linkrev(), f2.linkrev()

739

740

if f1r is None:

741

if f1r is None:

741

f1 = next(g1)

742

f1 = next(g1)

742

if f2r is None:

743

if f2r is None:

743

f2 = next(g2)

744

f2 = next(g2)

744

745

while True:

746

while True:

746

f1r, f2r = f1.linkrev(), f2.linkrev()

747

f1r, f2r = f1.linkrev(), f2.linkrev()

747

if f1r > f2r:

748

if f1r > f2r:

748

f1 = next(g1)

749

f1 = next(g1)

749

elif f2r > f1r:

750

elif f2r > f1r:

750

f2 = next(g2)

751

f2 = next(g2)

751

else: # f1 and f2 point to files in the same linkrev

752

else: # f1 and f2 point to files in the same linkrev

752

return f1 == f2 # true if they point to the same file

753

return f1 == f2 # true if they point to the same file

753

except StopIteration:

754

except StopIteration:

754

return False

755

return False

755

756

757

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

758

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

758

"""reproduce copies from fromrev to rev in the dirstate

759

"""reproduce copies from fromrev to rev in the dirstate

759

760

If skiprev is specified, it's a revision that should be used to

761

If skiprev is specified, it's a revision that should be used to

761

filter copy records. Any copies that occur between fromrev and

762

filter copy records. Any copies that occur between fromrev and

762

skiprev will not be duplicated, even if they appear in the set of

763

skiprev will not be duplicated, even if they appear in the set of

763

copies between fromrev and rev.

764

copies between fromrev and rev.

764

"""

765

"""

765

exclude = {}

766

exclude = {}

766

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

767

ctraceconfig = repo.ui.config(b'experimental', b'copytrace')

767

bctrace = stringutil.parsebool(ctraceconfig)

768

bctrace = stringutil.parsebool(ctraceconfig)

768

if skiprev is not None and (

769

if skiprev is not None and (

769

ctraceconfig == b'heuristics' or bctrace or bctrace is None

770

ctraceconfig == b'heuristics' or bctrace or bctrace is None

770

):

771

):

771

# copytrace='off' skips this line, but not the entire function because

772

# copytrace='off' skips this line, but not the entire function because

772

# the line below is O(size of the repo) during a rebase, while the rest

773

# the line below is O(size of the repo) during a rebase, while the rest

773

# of the function is much faster (and is required for carrying copy

774

# of the function is much faster (and is required for carrying copy

774

# metadata across the rebase anyway).

775

# metadata across the rebase anyway).

775

exclude = pathcopies(repo[fromrev], repo[skiprev])

776

exclude = pathcopies(repo[fromrev], repo[skiprev])

776

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

777

for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):

777

if dst in exclude:

778

if dst in exclude:

778

continue

779

continue

779

if dst in wctx:

780

if dst in wctx:

780

wctx[dst].markcopied(src)

781

wctx[dst].markcopied(src)

781

782

783

def computechangesetfilesadded(ctx):

784

def computechangesetfilesadded(ctx):

784

"""return the list of files added in a changeset

785

"""return the list of files added in a changeset

785

"""

786

"""

786

added = []

787

added = []

787

for f in ctx.files():

788

for f in ctx.files():

788

if not any(f in p for p in ctx.parents()):

789

if not any(f in p for p in ctx.parents()):

789

added.append(f)

790

added.append(f)

790

return added

791

return added

791

792

793

def computechangesetfilesremoved(ctx):

794

def computechangesetfilesremoved(ctx):

794

"""return the list of files removed in a changeset

795

"""return the list of files removed in a changeset

795

"""

796

"""

796

removed = []

797

removed = []

797

for f in ctx.files():

798

for f in ctx.files():

798

if f not in ctx:

799

if f not in ctx:

799

removed.append(f)

800

removed.append(f)

800

return removed

801

return removed

801

802

803

def computechangesetcopies(ctx):

804

def computechangesetcopies(ctx):

804

"""return the copies data for a changeset

805

"""return the copies data for a changeset

805

806

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

807

The copies data are returned as a pair of dictionnary (p1copies, p2copies).

807

808

Each dictionnary are in the form: `{newname: oldname}`

809

Each dictionnary are in the form: `{newname: oldname}`

809

"""

810

"""

810

p1copies = {}

811

p1copies = {}

811

p2copies = {}

812

p2copies = {}

812

p1 = ctx.p1()

813

p1 = ctx.p1()

813

p2 = ctx.p2()

814

p2 = ctx.p2()

814

narrowmatch = ctx._repo.narrowmatch()

815

narrowmatch = ctx._repo.narrowmatch()

815

for dst in ctx.files():

816

for dst in ctx.files():

816

if not narrowmatch(dst) or dst not in ctx:

817

if not narrowmatch(dst) or dst not in ctx:

817

continue

818

continue

818

copied = ctx[dst].renamed()

819

copied = ctx[dst].renamed()

819

if not copied:

820

if not copied:

820

continue

821

continue

821

src, srcnode = copied

822

src, srcnode = copied

822

if src in p1 and p1[src].filenode() == srcnode:

823

if src in p1 and p1[src].filenode() == srcnode:

823

p1copies[dst] = src

824

p1copies[dst] = src

824

elif src in p2 and p2[src].filenode() == srcnode:

825

elif src in p2 and p2[src].filenode() == srcnode:

825

p2copies[dst] = src

826

p2copies[dst] = src

826

return p1copies, p2copies

827

return p1copies, p2copies

827

828

829

def encodecopies(files, copies):

830

def encodecopies(files, copies):

830

items = []

831

items = []

831

for i, dst in enumerate(files):

832

for i, dst in enumerate(files):

832

if dst in copies:

833

if dst in copies:

833

items.append(b'%d\0%s' % (i, copies[dst]))

834

items.append(b'%d\0%s' % (i, copies[dst]))

834

if len(items) != len(copies):

835

if len(items) != len(copies):

835

raise error.ProgrammingError(

836

raise error.ProgrammingError(

836

b'some copy targets missing from file list'

837

b'some copy targets missing from file list'

837

)

838

)

838

return b"\n".join(items)

839

return b"\n".join(items)

839

840

841

def decodecopies(files, data):

842

def decodecopies(files, data):

842

try:

843

try:

843

copies = {}

844

copies = {}

844

if not data:

845

if not data:

845

return copies

846

return copies

846

for l in data.split(b'\n'):

847

for l in data.split(b'\n'):

847

strindex, src = l.split(b'\0')

848

strindex, src = l.split(b'\0')

848

i = int(strindex)

849

i = int(strindex)

849

dst = files[i]

850

dst = files[i]

850

copies[dst] = src

851

copies[dst] = src

851

return copies

852

return copies

852

except (ValueError, IndexError):

853

except (ValueError, IndexError):

853

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

854

# Perhaps someone had chosen the same key name (e.g. "p1copies") and

854

# used different syntax for the value.

855

# used different syntax for the value.

855

return None

856

return None

856

857

858

def encodefileindices(files, subset):

859

def encodefileindices(files, subset):

859

subset = set(subset)

860

subset = set(subset)

860

indices = []

861

indices = []

861

for i, f in enumerate(files):

862

for i, f in enumerate(files):

862

if f in subset:

863

if f in subset:

863

indices.append(b'%d' % i)

864

indices.append(b'%d' % i)

864

return b'\n'.join(indices)

865

return b'\n'.join(indices)

865

866

867

def decodefileindices(files, data):

868

def decodefileindices(files, data):

868

try:

869

try:

869

subset = []

870

subset = []

870

if not data:

871

if not data:

871

return subset

872

return subset

872

for strindex in data.split(b'\n'):

873

for strindex in data.split(b'\n'):

873

i = int(strindex)

874

i = int(strindex)

874

if i < 0 or i >= len(files):

875

if i < 0 or i >= len(files):

875

return None

876

return None

876

subset.append(files[i])

877

subset.append(files[i])

877

return subset

878

return subset

878

except (ValueError, IndexError):

879

except (ValueError, IndexError):

879

# Perhaps someone had chosen the same key name (e.g. "added") and

880

# Perhaps someone had chosen the same key name (e.g. "added") and

880

# used different syntax for the value.

881

# used different syntax for the value.

881

return None

882

return None

882

883

884

def _getsidedata(srcrepo, rev):

885

def _getsidedata(srcrepo, rev):

885

ctx = srcrepo[rev]

886

ctx = srcrepo[rev]

886

filescopies = computechangesetcopies(ctx)

887

filescopies = computechangesetcopies(ctx)

887

filesadded = computechangesetfilesadded(ctx)

888

filesadded = computechangesetfilesadded(ctx)

888

filesremoved = computechangesetfilesremoved(ctx)

889

filesremoved = computechangesetfilesremoved(ctx)

889

sidedata = {}

890

sidedata = {}

890

if any([filescopies, filesadded, filesremoved]):

891

if any([filescopies, filesadded, filesremoved]):

891

sortedfiles = sorted(ctx.files())

892

sortedfiles = sorted(ctx.files())

892

p1copies, p2copies = filescopies

893

p1copies, p2copies = filescopies

893

p1copies = encodecopies(sortedfiles, p1copies)

894

p1copies = encodecopies(sortedfiles, p1copies)

894

p2copies = encodecopies(sortedfiles, p2copies)

895

p2copies = encodecopies(sortedfiles, p2copies)

895

filesadded = encodefileindices(sortedfiles, filesadded)

896

filesadded = encodefileindices(sortedfiles, filesadded)

896

filesremoved = encodefileindices(sortedfiles, filesremoved)

897

filesremoved = encodefileindices(sortedfiles, filesremoved)

897

if p1copies:

898

if p1copies:

898

sidedata[sidedatamod.SD_P1COPIES] = p1copies

899

sidedata[sidedatamod.SD_P1COPIES] = p1copies

899

if p2copies:

900

if p2copies:

900

sidedata[sidedatamod.SD_P2COPIES] = p2copies

901

sidedata[sidedatamod.SD_P2COPIES] = p2copies

901

if filesadded:

902

if filesadded:

902

sidedata[sidedatamod.SD_FILESADDED] = filesadded

903

sidedata[sidedatamod.SD_FILESADDED] = filesadded

903

if filesremoved:

904

if filesremoved:

904

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

905

sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

905

return sidedata

906

return sidedata

906

907

908

def getsidedataadder(srcrepo, destrepo):

909

def getsidedataadder(srcrepo, destrepo):

909

def sidedatacompanion(revlog, rev):

910

def sidedatacompanion(revlog, rev):

910

sidedata = {}

911

sidedata = {}

911

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

912

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

912

sidedata = _getsidedata(srcrepo, rev)

913

sidedata = _getsidedata(srcrepo, rev)

913

return False, (), sidedata

914

return False, (), sidedata

914

915

return sidedatacompanion

916

return sidedatacompanion

916

917

918

def getsidedataremover(srcrepo, destrepo):

919

def getsidedataremover(srcrepo, destrepo):

919

def sidedatacompanion(revlog, rev):

920

def sidedatacompanion(revlog, rev):

920

f = ()

921

f = ()

921

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

922

if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog

922

if revlog.flags(rev) & REVIDX_SIDEDATA:

923

if revlog.flags(rev) & REVIDX_SIDEDATA:

923

f = (

924

f = (

924

sidedatamod.SD_P1COPIES,

925

sidedatamod.SD_P1COPIES,

925

sidedatamod.SD_P2COPIES,

926

sidedatamod.SD_P2COPIES,

926

sidedatamod.SD_FILESADDED,

927

sidedatamod.SD_FILESADDED,

927

sidedatamod.SD_FILESREMOVED,

928

sidedatamod.SD_FILESREMOVED,

928

)

929

)

929

return False, f, {}

930

return False, f, {}

930

931

return sidedatacompanion

932

return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 error,
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .revlogutils import sidedata as sidedatamod
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 t = a.copy()
                 for k, v in pycompat.iteritems(b):
                     if v in t:
                         t[k] = t[v]
                     else:
                         t[k] = v
                 return t
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo()
                 children = {}
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                 roots = set(children) - set(missingrevs)
                 work = list(roots)
                 all_copies = {r: {} for r in roots}
                 heapq.heapify(work)
                 alwaysmatch = match.always()
                 while work:
                     r = heapq.heappop(work)
                     copies = all_copies.pop(r)
                     if r == b.rev():
                         return copies
                     for i, c in enumerate(children[r]):
                         childctx = repo[c]
+                        p1copies, p2copies = childctx._copies
                         if r == childctx.p1().rev():
                             parent = 1
-                            childcopies = childctx.p1copies()
+                            childcopies = p1copies
                         else:
                             assert r == childctx.p2().rev()
                             parent = 2
-                            childcopies = childctx.p2copies()
+                            childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         # Copy the dict only if later iterations will also need it
                         if i != len(children[r]) - 1:
                             newcopies = copies.copy()
                         else:
                             newcopies = copies
                         if childcopies:
                             newcopies = _chain(newcopies, childcopies)
                         for f in childctx.filesremoved():
                             if f in newcopies:
                                 del newcopies[f]
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             heapq.heappush(work, c)
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # Unlike when copies are stored in the filelog, we consider
                             # it a copy even if the destination already existed on the
                             # other branch. It's simply too expensive to check if the
                             # file existed in the manifest.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 othercopies.update(newcopies)
                             else:
                                 newcopies.update(othercopies)
                                 all_copies[c] = newcopies
                 assert False
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     if y.rev() is None and x == y.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(repo, match)
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
                         )
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for f in sorted(fullcopy):
                         note = b""
                         if f in copy:
                             note += b"*"
                         if f in divergeset:
                             note += b"!"
                         if f in renamedeleteset:
                             note += b"%"
                         repo.ui.debug(
                             b"   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
                         )
                 del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if skiprev is not None and (
                     ctraceconfig == b'heuristics' or bctrace or bctrace is None
                 ):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
                     if dst in exclude:
                         continue
                     if dst in wctx:
                         wctx[dst].markcopied(src)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 return removed
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 filescopies = computechangesetcopies(ctx)
                 filesadded = computechangesetfilesadded(ctx)
                 filesremoved = computechangesetfilesremoved(ctx)
                 sidedata = {}
                 if any([filescopies, filesadded, filesremoved]):
                     sortedfiles = sorted(ctx.files())
                     p1copies, p2copies = filescopies
                     p1copies = encodecopies(sortedfiles, p1copies)
                     p2copies = encodecopies(sortedfiles, p2copies)
                     filesadded = encodefileindices(sortedfiles, filesadded)
                     filesremoved = encodefileindices(sortedfiles, filesremoved)
                     if p1copies:
                         sidedata[sidedatamod.SD_P1COPIES] = p1copies
                     if p2copies:
                         sidedata[sidedatamod.SD_P2COPIES] = p2copies
                     if filesadded:
                         sidedata[sidedatamod.SD_FILESADDED] = filesadded
                     if filesremoved:
                         sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
                 return sidedata
             def getsidedataadder(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion