upstream/mercurial-mirror Commit - r46217:2693659c

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

pycompat,

20

pycompat,

21

util,

21

util,

22

)

22

)

23

24

25

from .utils import stringutil

25

from .utils import stringutil

26

27

28

def _filter(src, dst, t):

28

def _filter(src, dst, t):

29

"""filters out invalid copies after chaining"""

29

"""filters out invalid copies after chaining"""

30

31

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

31

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

32

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

32

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

33

# in the following table (not including trivial cases). For example, case 2

33

# in the following table (not including trivial cases). For example, case 2

34

# is where a file existed in 'src' and remained under that name in 'mid' and

34

# is where a file existed in 'src' and remained under that name in 'mid' and

35

# then was renamed between 'mid' and 'dst'.

35

# then was renamed between 'mid' and 'dst'.

36

#

36

#

37

# case src mid dst result

37

# case src mid dst result

38

# 1 x y - -

38

# 1 x y - -

39

# 2 x y y x->y

39

# 2 x y y x->y

40

# 3 x y x -

40

# 3 x y x -

41

# 4 x y z x->z

41

# 4 x y z x->z

42

# 5 - x y -

42

# 5 - x y -

43

# 6 x x y x->y

43

# 6 x x y x->y

44

#

44

#

45

# _chain() takes care of chaining the copies in 'a' and 'b', but it

45

# _chain() takes care of chaining the copies in 'a' and 'b', but it

46

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

46

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

47

# between 5 and 6, so it includes all cases in its result.

47

# between 5 and 6, so it includes all cases in its result.

48

# Cases 1, 3, and 5 are then removed by _filter().

48

# Cases 1, 3, and 5 are then removed by _filter().

49

50

for k, v in list(t.items()):

50

for k, v in list(t.items()):

51

# remove copies from files that didn't exist

51

# remove copies from files that didn't exist

52

if v not in src:

52

if v not in src:

53

del t[k]

53

del t[k]

54

# remove criss-crossed copies

54

# remove criss-crossed copies

55

elif k in src and v in dst:

55

elif k in src and v in dst:

56

del t[k]

56

del t[k]

57

# remove copies to files that were then removed

57

# remove copies to files that were then removed

58

elif k not in dst:

58

elif k not in dst:

59

del t[k]

59

del t[k]

60

61

62

def _chain(prefix, suffix):

62

def _chain(prefix, suffix):

63

"""chain two sets of copies 'prefix' and 'suffix'"""

63

"""chain two sets of copies 'prefix' and 'suffix'"""

64

result = prefix.copy()

64

result = prefix.copy()

65

for key, value in pycompat.iteritems(suffix):

65

for key, value in pycompat.iteritems(suffix):

66

result[key] = prefix.get(value, value)

66

result[key] = prefix.get(value, value)

67

return result

67

return result

68

69

70

def _tracefile(fctx, am, basemf):

70

def _tracefile(fctx, am, basemf):

71

"""return file context that is the ancestor of fctx present in ancestor

71

"""return file context that is the ancestor of fctx present in ancestor

72

manifest am

72

manifest am

73

74

Note: we used to try and stop after a given limit, however checking if that

74

Note: we used to try and stop after a given limit, however checking if that

75

limit is reached turned out to be very expensive. we are better off

75

limit is reached turned out to be very expensive. we are better off

76

disabling that feature."""

76

disabling that feature."""

77

78

for f in fctx.ancestors():

78

for f in fctx.ancestors():

79

path = f.path()

79

path = f.path()

80

if am.get(path, None) == f.filenode():

80

if am.get(path, None) == f.filenode():

81

return path

81

return path

82

if basemf and basemf.get(path, None) == f.filenode():

82

if basemf and basemf.get(path, None) == f.filenode():

83

return path

83

return path

84

85

86

def _dirstatecopies(repo, match=None):

86

def _dirstatecopies(repo, match=None):

87

ds = repo.dirstate

87

ds = repo.dirstate

88

c = ds.copies().copy()

88

c = ds.copies().copy()

89

for k in list(c):

89

for k in list(c):

90

if ds[k] not in b'anm' or (match and not match(k)):

90

if ds[k] not in b'anm' or (match and not match(k)):

91

del c[k]

91

del c[k]

92

return c

92

return c

93

94

95

def _computeforwardmissing(a, b, match=None):

95

def _computeforwardmissing(a, b, match=None):

96

"""Computes which files are in b but not a.

96

"""Computes which files are in b but not a.

97

This is its own function so extensions can easily wrap this call to see what

97

This is its own function so extensions can easily wrap this call to see what

98

files _forwardcopies is about to process.

98

files _forwardcopies is about to process.

99

"""

99

"""

100

ma = a.manifest()

100

ma = a.manifest()

101

mb = b.manifest()

101

mb = b.manifest()

102

return mb.filesnotin(ma, match=match)

102

return mb.filesnotin(ma, match=match)

103

104

105

def usechangesetcentricalgo(repo):

105

def usechangesetcentricalgo(repo):

106

"""Checks if we should use changeset-centric copy algorithms"""

106

"""Checks if we should use changeset-centric copy algorithms"""

107

if repo.filecopiesmode == b'changeset-sidedata':

107

if repo.filecopiesmode == b'changeset-sidedata':

108

return True

108

return True

109

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

109

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

110

changesetsource = (b'changeset-only', b'compatibility')

110

changesetsource = (b'changeset-only', b'compatibility')

111

return readfrom in changesetsource

111

return readfrom in changesetsource

112

113

114

def _committedforwardcopies(a, b, base, match):

114

def _committedforwardcopies(a, b, base, match):

115

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

115

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

116

# files might have to be traced back to the fctx parent of the last

116

# files might have to be traced back to the fctx parent of the last

117

# one-side-only changeset, but not further back than that

117

# one-side-only changeset, but not further back than that

118

repo = a._repo

118

repo = a._repo

119

120

if usechangesetcentricalgo(repo):

120

if usechangesetcentricalgo(repo):

121

return _changesetforwardcopies(a, b, match)

121

return _changesetforwardcopies(a, b, match)

122

123

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

123

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

124

dbg = repo.ui.debug

124

dbg = repo.ui.debug

125

if debug:

125

if debug:

126

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

126

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

127

am = a.manifest()

127

am = a.manifest()

128

basemf = None if base is None else base.manifest()

128

basemf = None if base is None else base.manifest()

129

130

# find where new files came from

130

# find where new files came from

131

# we currently don't try to find where old files went, too expensive

131

# we currently don't try to find where old files went, too expensive

132

# this means we can miss a case like 'hg rm b; hg cp a b'

132

# this means we can miss a case like 'hg rm b; hg cp a b'

133

cm = {}

133

cm = {}

134

135

# Computing the forward missing is quite expensive on large manifests, since

135

# Computing the forward missing is quite expensive on large manifests, since

136

# it compares the entire manifests. We can optimize it in the common use

136

# it compares the entire manifests. We can optimize it in the common use

137

# case of computing what copies are in a commit versus its parent (like

137

# case of computing what copies are in a commit versus its parent (like

138

# during a rebase or histedit). Note, we exclude merge commits from this

138

# during a rebase or histedit). Note, we exclude merge commits from this

139

# optimization, since the ctx.files() for a merge commit is not correct for

139

# optimization, since the ctx.files() for a merge commit is not correct for

140

# this comparison.

140

# this comparison.

141

forwardmissingmatch = match

141

forwardmissingmatch = match

142

if b.p1() == a and b.p2().node() == node.nullid:

142

if b.p1() == a and b.p2().node() == node.nullid:

143

filesmatcher = matchmod.exact(b.files())

143

filesmatcher = matchmod.exact(b.files())

144

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

144

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

145

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

145

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

146

147

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

147

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

148

149

if debug:

149

if debug:

150

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

150

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

151

152

for f in sorted(missing):

152

for f in sorted(missing):

153

if debug:

153

if debug:

154

dbg(b'debug.copies: tracing file: %s\n' % f)

154

dbg(b'debug.copies: tracing file: %s\n' % f)

155

fctx = b[f]

155

fctx = b[f]

156

fctx._ancestrycontext = ancestrycontext

156

fctx._ancestrycontext = ancestrycontext

157

158

if debug:

158

if debug:

159

start = util.timer()

159

start = util.timer()

160

opath = _tracefile(fctx, am, basemf)

160

opath = _tracefile(fctx, am, basemf)

161

if opath:

161

if opath:

162

if debug:

162

if debug:

163

dbg(b'debug.copies: rename of: %s\n' % opath)

163

dbg(b'debug.copies: rename of: %s\n' % opath)

164

cm[f] = opath

164

cm[f] = opath

165

if debug:

165

if debug:

166

dbg(

166

dbg(

167

b'debug.copies: time: %f seconds\n'

167

b'debug.copies: time: %f seconds\n'

168

% (util.timer() - start)

168

% (util.timer() - start)

169

)

169

)

170

return cm

170

return cm

171

172

173

def _revinfo_getter(repo):

173

def _revinfo_getter(repo):

174

"""return a function that return ~~multiple~~ data given a <rev>"i

174

"""returns a function that returns the following data given a <rev>"

175

176

* p1: revision number of first parent

176

* p1: revision number of first parent

177

* p2: revision number of first parent

177

* p2: revision number of first parent

178

* p1copies: mapping of copies from p1

178

* changes: a ChangingFiles object

179

* p2copies: mapping of copies from p2

180

* removed: a list of removed files

181

* ismerged: a callback to know if file was merged in that revision

182

"""

179

"""

183

cl = repo.changelog

180

cl = repo.changelog

184

parents = cl.parentrevs

181

parents = cl.parentrevs

185

182

186

def get_ismerged(rev):

187

ctx = repo[rev]

188

189

def ismerged(path):

190

if path not in ctx.files():

191

return False

192

fctx = ctx[path]

193

parents = fctx._filelog.parents(fctx._filenode)

194

nb_parents = 0

195

for n in parents:

196

if n != node.nullid:

197

nb_parents += 1

198

return nb_parents >= 2

199

200

return ismerged

201

202

changelogrevision = cl.changelogrevision

183

changelogrevision = cl.changelogrevision

203

184

204

# A small cache to avoid doing the work twice for merges

185

# A small cache to avoid doing the work twice for merges

205

#

186

#

206

# In the vast majority of cases, if we ask information for a revision

187

# In the vast majority of cases, if we ask information for a revision

207

# about 1 parent, we'll later ask it for the other. So it make sense to

188

# about 1 parent, we'll later ask it for the other. So it make sense to

208

# keep the information around when reaching the first parent of a merge

189

# keep the information around when reaching the first parent of a merge

209

# and dropping it after it was provided for the second parents.

190

# and dropping it after it was provided for the second parents.

210

#

191

#

211

# It exists cases were only one parent of the merge will be walked. It

192

# It exists cases were only one parent of the merge will be walked. It

212

# happens when the "destination" the copy tracing is descendant from a

193

# happens when the "destination" the copy tracing is descendant from a

213

# new root, not common with the "source". In that case, we will only walk

194

# new root, not common with the "source". In that case, we will only walk

214

# through merge parents that are descendant of changesets common

195

# through merge parents that are descendant of changesets common

215

# between "source" and "destination".

196

# between "source" and "destination".

216

#

197

#

217

# With the current case implementation if such changesets have a copy

198

# With the current case implementation if such changesets have a copy

218

# information, we'll keep them in memory until the end of

199

# information, we'll keep them in memory until the end of

219

# _changesetforwardcopies. We don't expect the case to be frequent

200

# _changesetforwardcopies. We don't expect the case to be frequent

220

# enough to matters.

201

# enough to matters.

221

#

202

#

222

# In addition, it would be possible to reach pathological case, were

203

# In addition, it would be possible to reach pathological case, were

223

# many first parent are met before any second parent is reached. In

204

# many first parent are met before any second parent is reached. In

224

# that case the cache could grow. If this even become an issue one can

205

# that case the cache could grow. If this even become an issue one can

225

# safely introduce a maximum cache size. This would trade extra CPU/IO

206

# safely introduce a maximum cache size. This would trade extra CPU/IO

226

# time to save memory.

207

# time to save memory.

227

merge_caches = {}

208

merge_caches = {}

228

209

229

def revinfo(rev):

210

def revinfo(rev):

230

p1, p2 = parents(rev)

211

p1, p2 = parents(rev)

231

value = None

212

value = None

232

e = merge_caches.pop(rev, None)

213

e = merge_caches.pop(rev, None)

233

if e is not None:

214

if e is not None:

234

return e

215

return e

235

c = changelogrevision(rev)

216

value = (p1, p2, changelogrevision(rev).changes)

236

p1copies = c.p1copies

237

p2copies = c.p2copies

238

removed = c.filesremoved

239

if p1 != node.nullrev and p2 != node.nullrev:

217

if p1 != node.nullrev and p2 != node.nullrev:

240

# XXX some case we over cache, IGNORE

218

# XXX some case we over cache, IGNORE

241

~~value~~ = merge_caches[rev] = (

219

merge_caches[rev] = value

242

p1,

243

p2,

244

p1copies,

245

p2copies,

246

removed,

247

get_ismerged(rev),

248

)

249

250

if value is None:

251

value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))

252

return value

220

return value

253

221

254

return revinfo

222

return revinfo

255

223

256

224

257

def _changesetforwardcopies(a, b, match):

225

def _changesetforwardcopies(a, b, match):

258

if a.rev() in (node.nullrev, b.rev()):

226

if a.rev() in (node.nullrev, b.rev()):

259

return {}

227

return {}

260

228

261

repo = a.repo().unfiltered()

229

repo = a.repo().unfiltered()

262

children = {}

230

children = {}

263

231

264

cl = repo.changelog

232

cl = repo.changelog

265

isancestor = cl.isancestorrev # XXX we should had chaching to this.

233

isancestor = cl.isancestorrev # XXX we should had chaching to this.

266

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

234

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

267

mrset = set(missingrevs)

235

mrset = set(missingrevs)

268

roots = set()

236

roots = set()

269

for r in missingrevs:

237

for r in missingrevs:

270

for p in cl.parentrevs(r):

238

for p in cl.parentrevs(r):

271

if p == node.nullrev:

239

if p == node.nullrev:

272

continue

240

continue

273

if p not in children:

241

if p not in children:

274

children[p] = [r]

242

children[p] = [r]

275

else:

243

else:

276

children[p].append(r)

244

children[p].append(r)

277

if p not in mrset:

245

if p not in mrset:

278

roots.add(p)

246

roots.add(p)

279

if not roots:

247

if not roots:

280

# no common revision to track copies from

248

# no common revision to track copies from

281

return {}

249

return {}

282

min_root = min(roots)

250

min_root = min(roots)

283

251

284

from_head = set(

252

from_head = set(

285

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

253

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

286

)

254

)

287

255

288

iterrevs = set(from_head)

256

iterrevs = set(from_head)

289

iterrevs &= mrset

257

iterrevs &= mrset

290

iterrevs.update(roots)

258

iterrevs.update(roots)

291

iterrevs.remove(b.rev())

259

iterrevs.remove(b.rev())

292

revs = sorted(iterrevs)

260

revs = sorted(iterrevs)

293

261

294

if repo.filecopiesmode == b'changeset-sidedata':

262

if repo.filecopiesmode == b'changeset-sidedata':

295

revinfo = _revinfo_getter(repo)

263

revinfo = _revinfo_getter(repo)

296

return _combine_changeset_copies(

264

return _combine_changeset_copies(

297

revs, children, b.rev(), revinfo, match, isancestor

265

revs, children, b.rev(), revinfo, match, isancestor

298

)

266

)

299

else:

267

else:

300

revinfo = _revinfo_getter_extra(repo)

268

revinfo = _revinfo_getter_extra(repo)

301

return _combine_changeset_copies_extra(

269

return _combine_changeset_copies_extra(

302

revs, children, b.rev(), revinfo, match, isancestor

270

revs, children, b.rev(), revinfo, match, isancestor

303

)

271

)

304

272

305

273

306

def _combine_changeset_copies(

274

def _combine_changeset_copies(

307

revs, children, targetrev, revinfo, match, isancestor

275

revs, children, targetrev, revinfo, match, isancestor

308

):

276

):

309

"""combine the copies information for each item of iterrevs

277

"""combine the copies information for each item of iterrevs

310

278

311

revs: sorted iterable of revision to visit

279

revs: sorted iterable of revision to visit

312

children: a {parent: [children]} mapping.

280

children: a {parent: [children]} mapping.

313

targetrev: the final copies destination revision (not in iterrevs)

281

targetrev: the final copies destination revision (not in iterrevs)

314

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

282

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

315

match: a matcher

283

match: a matcher

316

284

317

It returns the aggregated copies information for `targetrev`.

285

It returns the aggregated copies information for `targetrev`.

318

"""

286

"""

319

all_copies = {}

287

all_copies = {}

320

alwaysmatch = match.always()

288

alwaysmatch = match.always()

321

for r in revs:

289

for r in revs:

322

copies = all_copies.pop(r, None)

290

copies = all_copies.pop(r, None)

323

if copies is None:

291

if copies is None:

324

# this is a root

292

# this is a root

325

copies = {}

293

copies = {}

326

for i, c in enumerate(children[r]):

294

for i, c in enumerate(children[r]):

327

p1, p2, ~~p1copies~~, ~~p2copies~~, ~~removed~~, ~~ismerged~~ = revinfo(c)

295

p1, p2, changes = revinfo(c)

328

if r == p1:

296

if r == p1:

329

parent = 1

297

parent = 1

330

childcopies = ~~p1copies~~

298

childcopies = changes.copied_from_p1

331

else:

299

else:

332

assert r == p2

300

assert r == p2

333

parent = 2

301

parent = 2

334

childcopies = ~~p2copies~~

302

childcopies = changes.copied_from_p2

335

if not alwaysmatch:

303

if not alwaysmatch:

336

childcopies = {

304

childcopies = {

337

dst: src for dst, src in childcopies.items() if match(dst)

305

dst: src for dst, src in childcopies.items() if match(dst)

338

}

306

}

339

newcopies = copies

307

newcopies = copies

340

if childcopies:

308

if childcopies:

341

newcopies = copies.copy()

309

newcopies = copies.copy()

342

for dest, source in pycompat.iteritems(childcopies):

310

for dest, source in pycompat.iteritems(childcopies):

343

prev = copies.get(source)

311

prev = copies.get(source)

344

if prev is not None and prev[1] is not None:

312

if prev is not None and prev[1] is not None:

345

source = prev[1]

313

source = prev[1]

346

newcopies[dest] = (c, source)

314

newcopies[dest] = (c, source)

347

assert newcopies is not copies

315

assert newcopies is not copies

348

for f in removed:

316

for f in changes.removed:

349

if f in newcopies:

317

if f in newcopies:

350

if newcopies is copies:

318

if newcopies is copies:

351

# copy on write to avoid affecting potential other

319

# copy on write to avoid affecting potential other

352

# branches. when there are no other branches, this

320

# branches. when there are no other branches, this

353

# could be avoided.

321

# could be avoided.

354

newcopies = copies.copy()

322

newcopies = copies.copy()

355

newcopies[f] = (c, None)

323

newcopies[f] = (c, None)

356

othercopies = all_copies.get(c)

324

othercopies = all_copies.get(c)

357

if othercopies is None:

325

if othercopies is None:

358

all_copies[c] = newcopies

326

all_copies[c] = newcopies

359

else:

327

else:

360

# we are the second parent to work on c, we need to merge our

328

# we are the second parent to work on c, we need to merge our

361

# work with the other.

329

# work with the other.

362

#

330

#

363

# In case of conflict, parent 1 take precedence over parent 2.

331

# In case of conflict, parent 1 take precedence over parent 2.

364

# This is an arbitrary choice made anew when implementing

332

# This is an arbitrary choice made anew when implementing

365

# changeset based copies. It was made without regards with

333

# changeset based copies. It was made without regards with

366

# potential filelog related behavior.

334

# potential filelog related behavior.

367

if parent == 1:

335

if parent == 1:

368

_merge_copies_dict(

336

_merge_copies_dict(

369

othercopies, newcopies, isancestor, ~~ismerged~~

337

othercopies, newcopies, isancestor, changes

370

)

338

)

371

else:

339

else:

372

_merge_copies_dict(

340

_merge_copies_dict(

373

newcopies, othercopies, isancestor, ~~ismerged~~

341

newcopies, othercopies, isancestor, changes

374

)

342

)

375

all_copies[c] = newcopies

343

all_copies[c] = newcopies

376

344

377

final_copies = {}

345

final_copies = {}

378

for dest, (tt, source) in all_copies[targetrev].items():

346

for dest, (tt, source) in all_copies[targetrev].items():

379

if source is not None:

347

if source is not None:

380

final_copies[dest] = source

348

final_copies[dest] = source

381

return final_copies

349

return final_copies

382

350

383

351

384

def _merge_copies_dict(minor, major, isancestor, ~~ismerged~~):

352

def _merge_copies_dict(minor, major, isancestor, changes):

385

"""merge two copies-mapping together, minor and major

353

"""merge two copies-mapping together, minor and major

386

354

387

In case of conflict, value from "major" will be picked.

355

In case of conflict, value from "major" will be picked.

388

356

389

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

357

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

390

ancestors of `high_rev`,

358

ancestors of `high_rev`,

391

359

392

- `ismerged(path)`: callable return True if `path` have been merged in the

360

- `ismerged(path)`: callable return True if `path` have been merged in the

393

current revision,

361

current revision,

394

"""

362

"""

395

for dest, value in major.items():

363

for dest, value in major.items():

396

other = minor.get(dest)

364

other = minor.get(dest)

397

if other is None:

365

if other is None:

398

minor[dest] = value

366

minor[dest] = value

399

else:

367

else:

400

new_tt = value[0]

368

new_tt = value[0]

401

other_tt = other[0]

369

other_tt = other[0]

402

if value[1] == other[1]:

370

if value[1] == other[1]:

403

continue

371

continue

404

# content from "major" wins, unless it is older

372

# content from "major" wins, unless it is older

405

# than the branch point or there is a merge

373

# than the branch point or there is a merge

406

if (

374

if (

407

new_tt == other_tt

375

new_tt == other_tt

408

or not isancestor(new_tt, other_tt)

376

or not isancestor(new_tt, other_tt)

409

or ~~ismerged~~(dest)

377

or dest in changes.merged

410

):

378

):

411

minor[dest] = value

379

minor[dest] = value

412

380

413

381

414

def _revinfo_getter_extra(repo):

382

def _revinfo_getter_extra(repo):

415

"""return a function that return multiple data given a <rev>"i

383

"""return a function that return multiple data given a <rev>"i

416

384

417

* p1: revision number of first parent

385

* p1: revision number of first parent

418

* p2: revision number of first parent

386

* p2: revision number of first parent

419

* p1copies: mapping of copies from p1

387

* p1copies: mapping of copies from p1

420

* p2copies: mapping of copies from p2

388

* p2copies: mapping of copies from p2

421

* removed: a list of removed files

389

* removed: a list of removed files

422

* ismerged: a callback to know if file was merged in that revision

390

* ismerged: a callback to know if file was merged in that revision

423

"""

391

"""

424

cl = repo.changelog

392

cl = repo.changelog

425

parents = cl.parentrevs

393

parents = cl.parentrevs

426

394

427

def get_ismerged(rev):

395

def get_ismerged(rev):

428

ctx = repo[rev]

396

ctx = repo[rev]

429

397

430

def ismerged(path):

398

def ismerged(path):

431

if path not in ctx.files():

399

if path not in ctx.files():

432

return False

400

return False

433

fctx = ctx[path]

401

fctx = ctx[path]

434

parents = fctx._filelog.parents(fctx._filenode)

402

parents = fctx._filelog.parents(fctx._filenode)

435

nb_parents = 0

403

nb_parents = 0

436

for n in parents:

404

for n in parents:

437

if n != node.nullid:

405

if n != node.nullid:

438

nb_parents += 1

406

nb_parents += 1

439

return nb_parents >= 2

407

return nb_parents >= 2

440

408

441

return ismerged

409

return ismerged

442

410

443

def revinfo(rev):

411

def revinfo(rev):

444

p1, p2 = parents(rev)

412

p1, p2 = parents(rev)

445

ctx = repo[rev]

413

ctx = repo[rev]

446

p1copies, p2copies = ctx._copies

414

p1copies, p2copies = ctx._copies

447

removed = ctx.filesremoved()

415

removed = ctx.filesremoved()

448

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

416

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

449

417

450

return revinfo

418

return revinfo

451

419

452

420

453

def _combine_changeset_copies_extra(

421

def _combine_changeset_copies_extra(

454

revs, children, targetrev, revinfo, match, isancestor

422

revs, children, targetrev, revinfo, match, isancestor

455

):

423

):

456

"""version of `_combine_changeset_copies` that works with the Google

424

"""version of `_combine_changeset_copies` that works with the Google

457

specific "extra" based storage for copy information"""

425

specific "extra" based storage for copy information"""

458

all_copies = {}

426

all_copies = {}

459

alwaysmatch = match.always()

427

alwaysmatch = match.always()

460

for r in revs:

428

for r in revs:

461

copies = all_copies.pop(r, None)

429

copies = all_copies.pop(r, None)

462

if copies is None:

430

if copies is None:

463

# this is a root

431

# this is a root

464

copies = {}

432

copies = {}

465

for i, c in enumerate(children[r]):

433

for i, c in enumerate(children[r]):

466

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

434

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

467

if r == p1:

435

if r == p1:

468

parent = 1

436

parent = 1

469

childcopies = p1copies

437

childcopies = p1copies

470

else:

438

else:

471

assert r == p2

439

assert r == p2

472

parent = 2

440

parent = 2

473

childcopies = p2copies

441

childcopies = p2copies

474

if not alwaysmatch:

442

if not alwaysmatch:

475

childcopies = {

443

childcopies = {

476

dst: src for dst, src in childcopies.items() if match(dst)

444

dst: src for dst, src in childcopies.items() if match(dst)

477

}

445

}

478

newcopies = copies

446

newcopies = copies

479

if childcopies:

447

if childcopies:

480

newcopies = copies.copy()

448

newcopies = copies.copy()

481

for dest, source in pycompat.iteritems(childcopies):

449

for dest, source in pycompat.iteritems(childcopies):

482

prev = copies.get(source)

450

prev = copies.get(source)

483

if prev is not None and prev[1] is not None:

451

if prev is not None and prev[1] is not None:

484

source = prev[1]

452

source = prev[1]

485

newcopies[dest] = (c, source)

453

newcopies[dest] = (c, source)

486

assert newcopies is not copies

454

assert newcopies is not copies

487

for f in removed:

455

for f in removed:

488

if f in newcopies:

456

if f in newcopies:

489

if newcopies is copies:

457

if newcopies is copies:

490

# copy on write to avoid affecting potential other

458

# copy on write to avoid affecting potential other

491

# branches. when there are no other branches, this

459

# branches. when there are no other branches, this

492

# could be avoided.

460

# could be avoided.

493

newcopies = copies.copy()

461

newcopies = copies.copy()

494

newcopies[f] = (c, None)

462

newcopies[f] = (c, None)

495

othercopies = all_copies.get(c)

463

othercopies = all_copies.get(c)

496

if othercopies is None:

464

if othercopies is None:

497

all_copies[c] = newcopies

465

all_copies[c] = newcopies

498

else:

466

else:

499

# we are the second parent to work on c, we need to merge our

467

# we are the second parent to work on c, we need to merge our

500

# work with the other.

468

# work with the other.

501

#

469

#

502

# In case of conflict, parent 1 take precedence over parent 2.

470

# In case of conflict, parent 1 take precedence over parent 2.

503

# This is an arbitrary choice made anew when implementing

471

# This is an arbitrary choice made anew when implementing

504

# changeset based copies. It was made without regards with

472

# changeset based copies. It was made without regards with

505

# potential filelog related behavior.

473

# potential filelog related behavior.

506

if parent == 1:

474

if parent == 1:

507

_merge_copies_dict_extra(

475

_merge_copies_dict_extra(

508

othercopies, newcopies, isancestor, ismerged

476

othercopies, newcopies, isancestor, ismerged

509

)

477

)

510

else:

478

else:

511

_merge_copies_dict_extra(

479

_merge_copies_dict_extra(

512

newcopies, othercopies, isancestor, ismerged

480

newcopies, othercopies, isancestor, ismerged

513

)

481

)

514

all_copies[c] = newcopies

482

all_copies[c] = newcopies

515

483

516

final_copies = {}

484

final_copies = {}

517

for dest, (tt, source) in all_copies[targetrev].items():

485

for dest, (tt, source) in all_copies[targetrev].items():

518

if source is not None:

486

if source is not None:

519

final_copies[dest] = source

487

final_copies[dest] = source

520

return final_copies

488

return final_copies

521

489

522

490

523

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

491

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

524

"""version of `_merge_copies_dict` that works with the Google

492

"""version of `_merge_copies_dict` that works with the Google

525

specific "extra" based storage for copy information"""

493

specific "extra" based storage for copy information"""

526

for dest, value in major.items():

494

for dest, value in major.items():

527

other = minor.get(dest)

495

other = minor.get(dest)

528

if other is None:

496

if other is None:

529

minor[dest] = value

497

minor[dest] = value

530

else:

498

else:

531

new_tt = value[0]

499

new_tt = value[0]

532

other_tt = other[0]

500

other_tt = other[0]

533

if value[1] == other[1]:

501

if value[1] == other[1]:

534

continue

502

continue

535

# content from "major" wins, unless it is older

503

# content from "major" wins, unless it is older

536

# than the branch point or there is a merge

504

# than the branch point or there is a merge

537

if (

505

if (

538

new_tt == other_tt

506

new_tt == other_tt

539

or not isancestor(new_tt, other_tt)

507

or not isancestor(new_tt, other_tt)

540

or ismerged(dest)

508

or ismerged(dest)

541

):

509

):

542

minor[dest] = value

510

minor[dest] = value

543

511

544

512

545

def _forwardcopies(a, b, base=None, match=None):

513

def _forwardcopies(a, b, base=None, match=None):

546

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

514

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

547

515

548

if base is None:

516

if base is None:

549

base = a

517

base = a

550

match = a.repo().narrowmatch(match)

518

match = a.repo().narrowmatch(match)

551

# check for working copy

519

# check for working copy

552

if b.rev() is None:

520

if b.rev() is None:

553

cm = _committedforwardcopies(a, b.p1(), base, match)

521

cm = _committedforwardcopies(a, b.p1(), base, match)

554

# combine copies from dirstate if necessary

522

# combine copies from dirstate if necessary

555

copies = _chain(cm, _dirstatecopies(b._repo, match))

523

copies = _chain(cm, _dirstatecopies(b._repo, match))

556

else:

524

else:

557

copies = _committedforwardcopies(a, b, base, match)

525

copies = _committedforwardcopies(a, b, base, match)

558

return copies

526

return copies

559

527

560

528

561

def _backwardrenames(a, b, match):

529

def _backwardrenames(a, b, match):

562

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

530

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

563

return {}

531

return {}

564

532

565

# Even though we're not taking copies into account, 1:n rename situations

533

# Even though we're not taking copies into account, 1:n rename situations

566

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

534

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

567

# arbitrarily pick one of the renames.

535

# arbitrarily pick one of the renames.

568

# We don't want to pass in "match" here, since that would filter

536

# We don't want to pass in "match" here, since that would filter

569

# the destination by it. Since we're reversing the copies, we want

537

# the destination by it. Since we're reversing the copies, we want

570

# to filter the source instead.

538

# to filter the source instead.

571

f = _forwardcopies(b, a)

539

f = _forwardcopies(b, a)

572

r = {}

540

r = {}

573

for k, v in sorted(pycompat.iteritems(f)):

541

for k, v in sorted(pycompat.iteritems(f)):

574

if match and not match(v):

542

if match and not match(v):

575

continue

543

continue

576

# remove copies

544

# remove copies

577

if v in a:

545

if v in a:

578

continue

546

continue

579

r[v] = k

547

r[v] = k

580

return r

548

return r

581

549

582

550

583

def pathcopies(x, y, match=None):

551

def pathcopies(x, y, match=None):

584

"""find {dst@y: src@x} copy mapping for directed compare"""

552

"""find {dst@y: src@x} copy mapping for directed compare"""

585

repo = x._repo

553

repo = x._repo

586

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

554

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

587

if debug:

555

if debug:

588

repo.ui.debug(

556

repo.ui.debug(

589

b'debug.copies: searching copies from %s to %s\n' % (x, y)

557

b'debug.copies: searching copies from %s to %s\n' % (x, y)

590

)

558

)

591

if x == y or not x or not y:

559

if x == y or not x or not y:

592

return {}

560

return {}

593

if y.rev() is None and x == y.p1():

561

if y.rev() is None and x == y.p1():

594

if debug:

562

if debug:

595

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

563

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

596

# short-circuit to avoid issues with merge states

564

# short-circuit to avoid issues with merge states

597

return _dirstatecopies(repo, match)

565

return _dirstatecopies(repo, match)

598

a = y.ancestor(x)

566

a = y.ancestor(x)

599

if a == x:

567

if a == x:

600

if debug:

568

if debug:

601

repo.ui.debug(b'debug.copies: search mode: forward\n')

569

repo.ui.debug(b'debug.copies: search mode: forward\n')

602

copies = _forwardcopies(x, y, match=match)

570

copies = _forwardcopies(x, y, match=match)

603

elif a == y:

571

elif a == y:

604

if debug:

572

if debug:

605

repo.ui.debug(b'debug.copies: search mode: backward\n')

573

repo.ui.debug(b'debug.copies: search mode: backward\n')

606

copies = _backwardrenames(x, y, match=match)

574

copies = _backwardrenames(x, y, match=match)

607

else:

575

else:

608

if debug:

576

if debug:

609

repo.ui.debug(b'debug.copies: search mode: combined\n')

577

repo.ui.debug(b'debug.copies: search mode: combined\n')

610

base = None

578

base = None

611

if a.rev() != node.nullrev:

579

if a.rev() != node.nullrev:

612

base = x

580

base = x

613

copies = _chain(

581

copies = _chain(

614

_backwardrenames(x, a, match=match),

582

_backwardrenames(x, a, match=match),

615

_forwardcopies(a, y, base, match=match),

583

_forwardcopies(a, y, base, match=match),

616

)

584

)

617

_filter(x, y, copies)

585

_filter(x, y, copies)

618

return copies

586

return copies

619

587

620

588

621

def mergecopies(repo, c1, c2, base):

589

def mergecopies(repo, c1, c2, base):

622

"""

590

"""

623

Finds moves and copies between context c1 and c2 that are relevant for

591

Finds moves and copies between context c1 and c2 that are relevant for

624

merging. 'base' will be used as the merge base.

592

merging. 'base' will be used as the merge base.

625

593

626

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

594

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

627

files that were moved/ copied in one merge parent and modified in another.

595

files that were moved/ copied in one merge parent and modified in another.

628

For example:

596

For example:

629

597

630

o ---> 4 another commit

598

o ---> 4 another commit

631

|

599

|

632

| o ---> 3 commit that modifies a.txt

600

| o ---> 3 commit that modifies a.txt

633

| /

601

| /

634

o / ---> 2 commit that moves a.txt to b.txt

602

o / ---> 2 commit that moves a.txt to b.txt

635

|/

603

|/

636

o ---> 1 merge base

604

o ---> 1 merge base

637

605

638

If we try to rebase revision 3 on revision 4, since there is no a.txt in

606

If we try to rebase revision 3 on revision 4, since there is no a.txt in

639

revision 4, and if user have copytrace disabled, we prints the following

607

revision 4, and if user have copytrace disabled, we prints the following

640

message:

608

message:

641

609

642

```other changed <file> which local deleted```

610

```other changed <file> which local deleted```

643

611

644

Returns a tuple where:

612

Returns a tuple where:

645

613

646

"branch_copies" an instance of branch_copies.

614

"branch_copies" an instance of branch_copies.

647

615

648

"diverge" is a mapping of source name -> list of destination names

616

"diverge" is a mapping of source name -> list of destination names

649

for divergent renames.

617

for divergent renames.

650

618

651

This function calls different copytracing algorithms based on config.

619

This function calls different copytracing algorithms based on config.

652

"""

620

"""

653

# avoid silly behavior for update from empty dir

621

# avoid silly behavior for update from empty dir

654

if not c1 or not c2 or c1 == c2:

622

if not c1 or not c2 or c1 == c2:

655

return branch_copies(), branch_copies(), {}

623

return branch_copies(), branch_copies(), {}

656

624

657

narrowmatch = c1.repo().narrowmatch()

625

narrowmatch = c1.repo().narrowmatch()

658

626

659

# avoid silly behavior for parent -> working dir

627

# avoid silly behavior for parent -> working dir

660

if c2.node() is None and c1.node() == repo.dirstate.p1():

628

if c2.node() is None and c1.node() == repo.dirstate.p1():

661

return (

629

return (

662

branch_copies(_dirstatecopies(repo, narrowmatch)),

630

branch_copies(_dirstatecopies(repo, narrowmatch)),

663

branch_copies(),

631

branch_copies(),

664

{},

632

{},

665

)

633

)

666

634

667

copytracing = repo.ui.config(b'experimental', b'copytrace')

635

copytracing = repo.ui.config(b'experimental', b'copytrace')

668

if stringutil.parsebool(copytracing) is False:

636

if stringutil.parsebool(copytracing) is False:

669

# stringutil.parsebool() returns None when it is unable to parse the

637

# stringutil.parsebool() returns None when it is unable to parse the

670

# value, so we should rely on making sure copytracing is on such cases

638

# value, so we should rely on making sure copytracing is on such cases

671

return branch_copies(), branch_copies(), {}

639

return branch_copies(), branch_copies(), {}

672

640

673

if usechangesetcentricalgo(repo):

641

if usechangesetcentricalgo(repo):

674

# The heuristics don't make sense when we need changeset-centric algos

642

# The heuristics don't make sense when we need changeset-centric algos

675

return _fullcopytracing(repo, c1, c2, base)

643

return _fullcopytracing(repo, c1, c2, base)

676

644

677

# Copy trace disabling is explicitly below the node == p1 logic above

645

# Copy trace disabling is explicitly below the node == p1 logic above

678

# because the logic above is required for a simple copy to be kept across a

646

# because the logic above is required for a simple copy to be kept across a

679

# rebase.

647

# rebase.

680

if copytracing == b'heuristics':

648

if copytracing == b'heuristics':

681

# Do full copytracing if only non-public revisions are involved as

649

# Do full copytracing if only non-public revisions are involved as

682

# that will be fast enough and will also cover the copies which could

650

# that will be fast enough and will also cover the copies which could

683

# be missed by heuristics

651

# be missed by heuristics

684

if _isfullcopytraceable(repo, c1, base):

652

if _isfullcopytraceable(repo, c1, base):

685

return _fullcopytracing(repo, c1, c2, base)

653

return _fullcopytracing(repo, c1, c2, base)

686

return _heuristicscopytracing(repo, c1, c2, base)

654

return _heuristicscopytracing(repo, c1, c2, base)

687

else:

655

else:

688

return _fullcopytracing(repo, c1, c2, base)

656

return _fullcopytracing(repo, c1, c2, base)

689

657

690

658

691

def _isfullcopytraceable(repo, c1, base):

659

def _isfullcopytraceable(repo, c1, base):

692

""" Checks that if base, source and destination are all no-public branches,

660

""" Checks that if base, source and destination are all no-public branches,

693

if yes let's use the full copytrace algorithm for increased capabilities

661

if yes let's use the full copytrace algorithm for increased capabilities

694

since it will be fast enough.

662

since it will be fast enough.

695

663

696

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

664

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

697

number of changesets from c1 to base such that if number of changesets are

665

number of changesets from c1 to base such that if number of changesets are

698

more than the limit, full copytracing algorithm won't be used.

666

more than the limit, full copytracing algorithm won't be used.

699

"""

667

"""

700

if c1.rev() is None:

668

if c1.rev() is None:

701

c1 = c1.p1()

669

c1 = c1.p1()

702

if c1.mutable() and base.mutable():

670

if c1.mutable() and base.mutable():

703

sourcecommitlimit = repo.ui.configint(

671

sourcecommitlimit = repo.ui.configint(

704

b'experimental', b'copytrace.sourcecommitlimit'

672

b'experimental', b'copytrace.sourcecommitlimit'

705

)

673

)

706

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

674

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

707

return commits < sourcecommitlimit

675

return commits < sourcecommitlimit

708

return False

676

return False

709

677

710

678

711

def _checksinglesidecopies(

679

def _checksinglesidecopies(

712

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

680

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

713

):

681

):

714

if src not in m2:

682

if src not in m2:

715

# deleted on side 2

683

# deleted on side 2

716

if src not in m1:

684

if src not in m1:

717

# renamed on side 1, deleted on side 2

685

# renamed on side 1, deleted on side 2

718

renamedelete[src] = dsts1

686

renamedelete[src] = dsts1

719

elif src not in mb:

687

elif src not in mb:

720

# Work around the "short-circuit to avoid issues with merge states"

688

# Work around the "short-circuit to avoid issues with merge states"

721

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

689

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

722

# destination doesn't exist in y.

690

# destination doesn't exist in y.

723

pass

691

pass

724

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

692

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

725

return

693

return

726

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

694

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

727

# modified on side 2

695

# modified on side 2

728

for dst in dsts1:

696

for dst in dsts1:

729

copy[dst] = src

697

copy[dst] = src

730

698

731

699

732

class branch_copies(object):

700

class branch_copies(object):

733

"""Information about copies made on one side of a merge/graft.

701

"""Information about copies made on one side of a merge/graft.

734

702

735

"copy" is a mapping from destination name -> source name,

703

"copy" is a mapping from destination name -> source name,

736

where source is in c1 and destination is in c2 or vice-versa.

704

where source is in c1 and destination is in c2 or vice-versa.

737

705

738

"movewithdir" is a mapping from source name -> destination name,

706

"movewithdir" is a mapping from source name -> destination name,

739

where the file at source present in one context but not the other

707

where the file at source present in one context but not the other

740

needs to be moved to destination by the merge process, because the

708

needs to be moved to destination by the merge process, because the

741

other context moved the directory it is in.

709

other context moved the directory it is in.

742

710

743

"renamedelete" is a mapping of source name -> list of destination

711

"renamedelete" is a mapping of source name -> list of destination

744

names for files deleted in c1 that were renamed in c2 or vice-versa.

712

names for files deleted in c1 that were renamed in c2 or vice-versa.

745

713

746

"dirmove" is a mapping of detected source dir -> destination dir renames.

714

"dirmove" is a mapping of detected source dir -> destination dir renames.

747

This is needed for handling changes to new files previously grafted into

715

This is needed for handling changes to new files previously grafted into

748

renamed directories.

716

renamed directories.

749

"""

717

"""

750

718

751

def __init__(

719

def __init__(

752

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

720

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

753

):

721

):

754

self.copy = {} if copy is None else copy

722

self.copy = {} if copy is None else copy

755

self.renamedelete = {} if renamedelete is None else renamedelete

723

self.renamedelete = {} if renamedelete is None else renamedelete

756

self.dirmove = {} if dirmove is None else dirmove

724

self.dirmove = {} if dirmove is None else dirmove

757

self.movewithdir = {} if movewithdir is None else movewithdir

725

self.movewithdir = {} if movewithdir is None else movewithdir

758

726

759

def __repr__(self):

727

def __repr__(self):

760

return (

728

return (

761

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

729

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

762

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

730

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

763

)

731

)

764

732

765

733

766

def _fullcopytracing(repo, c1, c2, base):

734

def _fullcopytracing(repo, c1, c2, base):

767

""" The full copytracing algorithm which finds all the new files that were

735

""" The full copytracing algorithm which finds all the new files that were

768

added from merge base up to the top commit and for each file it checks if

736

added from merge base up to the top commit and for each file it checks if

769

this file was copied from another file.

737

this file was copied from another file.

770

738

771

This is pretty slow when a lot of changesets are involved but will track all

739

This is pretty slow when a lot of changesets are involved but will track all

772

the copies.

740

the copies.

773

"""

741

"""

774

m1 = c1.manifest()

742

m1 = c1.manifest()

775

m2 = c2.manifest()

743

m2 = c2.manifest()

776

mb = base.manifest()

744

mb = base.manifest()

777

745

778

copies1 = pathcopies(base, c1)

746

copies1 = pathcopies(base, c1)

779

copies2 = pathcopies(base, c2)

747

copies2 = pathcopies(base, c2)

780

748

781

if not (copies1 or copies2):

749

if not (copies1 or copies2):

782

return branch_copies(), branch_copies(), {}

750

return branch_copies(), branch_copies(), {}

783

751

784

inversecopies1 = {}

752

inversecopies1 = {}

785

inversecopies2 = {}

753

inversecopies2 = {}

786

for dst, src in copies1.items():

754

for dst, src in copies1.items():

787

inversecopies1.setdefault(src, []).append(dst)

755

inversecopies1.setdefault(src, []).append(dst)

788

for dst, src in copies2.items():

756

for dst, src in copies2.items():

789

inversecopies2.setdefault(src, []).append(dst)

757

inversecopies2.setdefault(src, []).append(dst)

790

758

791

copy1 = {}

759

copy1 = {}

792

copy2 = {}

760

copy2 = {}

793

diverge = {}

761

diverge = {}

794

renamedelete1 = {}

762

renamedelete1 = {}

795

renamedelete2 = {}

763

renamedelete2 = {}

796

allsources = set(inversecopies1) | set(inversecopies2)

764

allsources = set(inversecopies1) | set(inversecopies2)

797

for src in allsources:

765

for src in allsources:

798

dsts1 = inversecopies1.get(src)

766

dsts1 = inversecopies1.get(src)

799

dsts2 = inversecopies2.get(src)

767

dsts2 = inversecopies2.get(src)

800

if dsts1 and dsts2:

768

if dsts1 and dsts2:

801

# copied/renamed on both sides

769

# copied/renamed on both sides

802

if src not in m1 and src not in m2:

770

if src not in m1 and src not in m2:

803

# renamed on both sides

771

# renamed on both sides

804

dsts1 = set(dsts1)

772

dsts1 = set(dsts1)

805

dsts2 = set(dsts2)

773

dsts2 = set(dsts2)

806

# If there's some overlap in the rename destinations, we

774

# If there's some overlap in the rename destinations, we

807

# consider it not divergent. For example, if side 1 copies 'a'

775

# consider it not divergent. For example, if side 1 copies 'a'

808

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

776

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

809

# and 'd' and deletes 'a'.

777

# and 'd' and deletes 'a'.

810

if dsts1 & dsts2:

778

if dsts1 & dsts2:

811

for dst in dsts1 & dsts2:

779

for dst in dsts1 & dsts2:

812

copy1[dst] = src

780

copy1[dst] = src

813

copy2[dst] = src

781

copy2[dst] = src

814

else:

782

else:

815

diverge[src] = sorted(dsts1 | dsts2)

783

diverge[src] = sorted(dsts1 | dsts2)

816

elif src in m1 and src in m2:

784

elif src in m1 and src in m2:

817

# copied on both sides

785

# copied on both sides

818

dsts1 = set(dsts1)

786

dsts1 = set(dsts1)

819

dsts2 = set(dsts2)

787

dsts2 = set(dsts2)

820

for dst in dsts1 & dsts2:

788

for dst in dsts1 & dsts2:

821

copy1[dst] = src

789

copy1[dst] = src

822

copy2[dst] = src

790

copy2[dst] = src

823

# TODO: Handle cases where it was renamed on one side and copied

791

# TODO: Handle cases where it was renamed on one side and copied

824

# on the other side

792

# on the other side

825

elif dsts1:

793

elif dsts1:

826

# copied/renamed only on side 1

794

# copied/renamed only on side 1

827

_checksinglesidecopies(

795

_checksinglesidecopies(

828

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

796

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

829

)

797

)

830

elif dsts2:

798

elif dsts2:

831

# copied/renamed only on side 2

799

# copied/renamed only on side 2

832

_checksinglesidecopies(

800

_checksinglesidecopies(

833

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

801

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

834

)

802

)

835

803

836

# find interesting file sets from manifests

804

# find interesting file sets from manifests

837

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

805

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

838

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

806

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

839

u1 = sorted(addedinm1 - addedinm2)

807

u1 = sorted(addedinm1 - addedinm2)

840

u2 = sorted(addedinm2 - addedinm1)

808

u2 = sorted(addedinm2 - addedinm1)

841

809

842

header = b" unmatched files in %s"

810

header = b" unmatched files in %s"

843

if u1:

811

if u1:

844

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

812

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

845

if u2:

813

if u2:

846

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

814

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

847

815

848

if repo.ui.debugflag:

816

if repo.ui.debugflag:

849

renamedeleteset = set()

817

renamedeleteset = set()

850

divergeset = set()

818

divergeset = set()

851

for dsts in diverge.values():

819

for dsts in diverge.values():

852

divergeset.update(dsts)

820

divergeset.update(dsts)

853

for dsts in renamedelete1.values():

821

for dsts in renamedelete1.values():

854

renamedeleteset.update(dsts)

822

renamedeleteset.update(dsts)

855

for dsts in renamedelete2.values():

823

for dsts in renamedelete2.values():

856

renamedeleteset.update(dsts)

824

renamedeleteset.update(dsts)

857

825

858

repo.ui.debug(

826

repo.ui.debug(

859

b" all copies found (* = to merge, ! = divergent, "

827

b" all copies found (* = to merge, ! = divergent, "

860

b"% = renamed and deleted):\n"

828

b"% = renamed and deleted):\n"

861

)

829

)

862

for side, copies in ((b"local", copies1), (b"remote", copies2)):

830

for side, copies in ((b"local", copies1), (b"remote", copies2)):

863

if not copies:

831

if not copies:

864

continue

832

continue

865

repo.ui.debug(b" on %s side:\n" % side)

833

repo.ui.debug(b" on %s side:\n" % side)

866

for f in sorted(copies):

834

for f in sorted(copies):

867

note = b""

835

note = b""

868

if f in copy1 or f in copy2:

836

if f in copy1 or f in copy2:

869

note += b"*"

837

note += b"*"

870

if f in divergeset:

838

if f in divergeset:

871

note += b"!"

839

note += b"!"

872

if f in renamedeleteset:

840

if f in renamedeleteset:

873

note += b"%"

841

note += b"%"

874

repo.ui.debug(

842

repo.ui.debug(

875

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

843

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

876

)

844

)

877

del renamedeleteset

845

del renamedeleteset

878

del divergeset

846

del divergeset

879

847

880

repo.ui.debug(b" checking for directory renames\n")

848

repo.ui.debug(b" checking for directory renames\n")

881

849

882

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

850

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

883

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

851

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

884

852

885

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

853

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

886

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

854

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

887

855

888

return branch_copies1, branch_copies2, diverge

856

return branch_copies1, branch_copies2, diverge

889

857

890

858

891

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

859

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

892

"""Finds moved directories and files that should move with them.

860

"""Finds moved directories and files that should move with them.

893

861

894

ctx: the context for one of the sides

862

ctx: the context for one of the sides

895

copy: files copied on the same side (as ctx)

863

copy: files copied on the same side (as ctx)

896

fullcopy: files copied on the same side (as ctx), including those that

864

fullcopy: files copied on the same side (as ctx), including those that

897

merge.manifestmerge() won't care about

865

merge.manifestmerge() won't care about

898

addedfiles: added files on the other side (compared to ctx)

866

addedfiles: added files on the other side (compared to ctx)

899

"""

867

"""

900

# generate a directory move map

868

# generate a directory move map

901

d = ctx.dirs()

869

d = ctx.dirs()

902

invalid = set()

870

invalid = set()

903

dirmove = {}

871

dirmove = {}

904

872

905

# examine each file copy for a potential directory move, which is

873

# examine each file copy for a potential directory move, which is

906

# when all the files in a directory are moved to a new directory

874

# when all the files in a directory are moved to a new directory

907

for dst, src in pycompat.iteritems(fullcopy):

875

for dst, src in pycompat.iteritems(fullcopy):

908

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

876

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

909

if dsrc in invalid:

877

if dsrc in invalid:

910

# already seen to be uninteresting

878

# already seen to be uninteresting

911

continue

879

continue

912

elif dsrc in d and ddst in d:

880

elif dsrc in d and ddst in d:

913

# directory wasn't entirely moved locally

881

# directory wasn't entirely moved locally

914

invalid.add(dsrc)

882

invalid.add(dsrc)

915

elif dsrc in dirmove and dirmove[dsrc] != ddst:

883

elif dsrc in dirmove and dirmove[dsrc] != ddst:

916

# files from the same directory moved to two different places

884

# files from the same directory moved to two different places

917

invalid.add(dsrc)

885

invalid.add(dsrc)

918

else:

886

else:

919

# looks good so far

887

# looks good so far

920

dirmove[dsrc] = ddst

888

dirmove[dsrc] = ddst

921

889

922

for i in invalid:

890

for i in invalid:

923

if i in dirmove:

891

if i in dirmove:

924

del dirmove[i]

892

del dirmove[i]

925

del d, invalid

893

del d, invalid

926

894

927

if not dirmove:

895

if not dirmove:

928

return {}, {}

896

return {}, {}

929

897

930

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

898

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

931

899

932

for d in dirmove:

900

for d in dirmove:

933

repo.ui.debug(

901

repo.ui.debug(

934

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

902

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

935

)

903

)

936

904

937

movewithdir = {}

905

movewithdir = {}

938

# check unaccounted nonoverlapping files against directory moves

906

# check unaccounted nonoverlapping files against directory moves

939

for f in addedfiles:

907

for f in addedfiles:

940

if f not in fullcopy:

908

if f not in fullcopy:

941

for d in dirmove:

909

for d in dirmove:

942

if f.startswith(d):

910

if f.startswith(d):

943

# new file added in a directory that was moved, move it

911

# new file added in a directory that was moved, move it

944

df = dirmove[d] + f[len(d) :]

912

df = dirmove[d] + f[len(d) :]

945

if df not in copy:

913

if df not in copy:

946

movewithdir[f] = df

914

movewithdir[f] = df

947

repo.ui.debug(

915

repo.ui.debug(

948

b" pending file src: '%s' -> dst: '%s'\n"

916

b" pending file src: '%s' -> dst: '%s'\n"

949

% (f, df)

917

% (f, df)

950

)

918

)

951

break

919

break

952

920

953

return dirmove, movewithdir

921

return dirmove, movewithdir

954

922

955

923

956

def _heuristicscopytracing(repo, c1, c2, base):

924

def _heuristicscopytracing(repo, c1, c2, base):

957

""" Fast copytracing using filename heuristics

925

""" Fast copytracing using filename heuristics

958

926

959

Assumes that moves or renames are of following two types:

927

Assumes that moves or renames are of following two types:

960

928

961

1) Inside a directory only (same directory name but different filenames)

929

1) Inside a directory only (same directory name but different filenames)

962

2) Move from one directory to another

930

2) Move from one directory to another

963

(same filenames but different directory names)

931

(same filenames but different directory names)

964

932

965

Works only when there are no merge commits in the "source branch".

933

Works only when there are no merge commits in the "source branch".

966

Source branch is commits from base up to c2 not including base.

934

Source branch is commits from base up to c2 not including base.

967

935

968

If merge is involved it fallbacks to _fullcopytracing().

936

If merge is involved it fallbacks to _fullcopytracing().

969

937

970

Can be used by setting the following config:

938

Can be used by setting the following config:

971

939

972

[experimental]

940

[experimental]

973

copytrace = heuristics

941

copytrace = heuristics

974

942

975

In some cases the copy/move candidates found by heuristics can be very large

943

In some cases the copy/move candidates found by heuristics can be very large

976

in number and that will make the algorithm slow. The number of possible

944

in number and that will make the algorithm slow. The number of possible

977

candidates to check can be limited by using the config

945

candidates to check can be limited by using the config

978

`experimental.copytrace.movecandidateslimit` which defaults to 100.

946

`experimental.copytrace.movecandidateslimit` which defaults to 100.

979

"""

947

"""

980

948

981

if c1.rev() is None:

949

if c1.rev() is None:

982

c1 = c1.p1()

950

c1 = c1.p1()

983

if c2.rev() is None:

951

if c2.rev() is None:

984

c2 = c2.p1()

952

c2 = c2.p1()

985

953

986

changedfiles = set()

954

changedfiles = set()

987

m1 = c1.manifest()

955

m1 = c1.manifest()

988

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

956

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

989

# If base is not in c2 branch, we switch to fullcopytracing

957

# If base is not in c2 branch, we switch to fullcopytracing

990

repo.ui.debug(

958

repo.ui.debug(

991

b"switching to full copytracing as base is not "

959

b"switching to full copytracing as base is not "

992

b"an ancestor of c2\n"

960

b"an ancestor of c2\n"

993

)

961

)

994

return _fullcopytracing(repo, c1, c2, base)

962

return _fullcopytracing(repo, c1, c2, base)

995

963

996

ctx = c2

964

ctx = c2

997

while ctx != base:

965

while ctx != base:

998

if len(ctx.parents()) == 2:

966

if len(ctx.parents()) == 2:

999

# To keep things simple let's not handle merges

967

# To keep things simple let's not handle merges

1000

repo.ui.debug(b"switching to full copytracing because of merges\n")

968

repo.ui.debug(b"switching to full copytracing because of merges\n")

1001

return _fullcopytracing(repo, c1, c2, base)

969

return _fullcopytracing(repo, c1, c2, base)

1002

changedfiles.update(ctx.files())

970

changedfiles.update(ctx.files())

1003

ctx = ctx.p1()

971

ctx = ctx.p1()

1004

972

1005

copies2 = {}

973

copies2 = {}

1006

cp = _forwardcopies(base, c2)

974

cp = _forwardcopies(base, c2)

1007

for dst, src in pycompat.iteritems(cp):

975

for dst, src in pycompat.iteritems(cp):

1008

if src in m1:

976

if src in m1:

1009

copies2[dst] = src

977

copies2[dst] = src

1010

978

1011

# file is missing if it isn't present in the destination, but is present in

979

# file is missing if it isn't present in the destination, but is present in

1012

# the base and present in the source.

980

# the base and present in the source.

1013

# Presence in the base is important to exclude added files, presence in the

981

# Presence in the base is important to exclude added files, presence in the

1014

# source is important to exclude removed files.

982

# source is important to exclude removed files.

1015

filt = lambda f: f not in m1 and f in base and f in c2

983

filt = lambda f: f not in m1 and f in base and f in c2

1016

missingfiles = [f for f in changedfiles if filt(f)]

984

missingfiles = [f for f in changedfiles if filt(f)]

1017

985

1018

copies1 = {}

986

copies1 = {}

1019

if missingfiles:

987

if missingfiles:

1020

basenametofilename = collections.defaultdict(list)

988

basenametofilename = collections.defaultdict(list)

1021

dirnametofilename = collections.defaultdict(list)

989

dirnametofilename = collections.defaultdict(list)

1022

990

1023

for f in m1.filesnotin(base.manifest()):

991

for f in m1.filesnotin(base.manifest()):

1024

basename = os.path.basename(f)

992

basename = os.path.basename(f)

1025

dirname = os.path.dirname(f)

993

dirname = os.path.dirname(f)

1026

basenametofilename[basename].append(f)

994

basenametofilename[basename].append(f)

1027

dirnametofilename[dirname].append(f)

995

dirnametofilename[dirname].append(f)

1028

996

1029

for f in missingfiles:

997

for f in missingfiles:

1030

basename = os.path.basename(f)

998

basename = os.path.basename(f)

1031

dirname = os.path.dirname(f)

999

dirname = os.path.dirname(f)

1032

samebasename = basenametofilename[basename]

1000

samebasename = basenametofilename[basename]

1033

samedirname = dirnametofilename[dirname]

1001

samedirname = dirnametofilename[dirname]

1034

movecandidates = samebasename + samedirname

1002

movecandidates = samebasename + samedirname

1035

# f is guaranteed to be present in c2, that's why

1003

# f is guaranteed to be present in c2, that's why

1036

# c2.filectx(f) won't fail

1004

# c2.filectx(f) won't fail

1037

f2 = c2.filectx(f)

1005

f2 = c2.filectx(f)

1038

# we can have a lot of candidates which can slow down the heuristics

1006

# we can have a lot of candidates which can slow down the heuristics

1039

# config value to limit the number of candidates moves to check

1007

# config value to limit the number of candidates moves to check

1040

maxcandidates = repo.ui.configint(

1008

maxcandidates = repo.ui.configint(

1041

b'experimental', b'copytrace.movecandidateslimit'

1009

b'experimental', b'copytrace.movecandidateslimit'

1042

)

1010

)

1043

1011

1044

if len(movecandidates) > maxcandidates:

1012

if len(movecandidates) > maxcandidates:

1045

repo.ui.status(

1013

repo.ui.status(

1046

_(

1014

_(

1047

b"skipping copytracing for '%s', more "

1015

b"skipping copytracing for '%s', more "

1048

b"candidates than the limit: %d\n"

1016

b"candidates than the limit: %d\n"

1049

)

1017

)

1050

% (f, len(movecandidates))

1018

% (f, len(movecandidates))

1051

)

1019

)

1052

continue

1020

continue

1053

1021

1054

for candidate in movecandidates:

1022

for candidate in movecandidates:

1055

f1 = c1.filectx(candidate)

1023

f1 = c1.filectx(candidate)

1056

if _related(f1, f2):

1024

if _related(f1, f2):

1057

# if there are a few related copies then we'll merge

1025

# if there are a few related copies then we'll merge

1058

# changes into all of them. This matches the behaviour

1026

# changes into all of them. This matches the behaviour

1059

# of upstream copytracing

1027

# of upstream copytracing

1060

copies1[candidate] = f

1028

copies1[candidate] = f

1061

1029

1062

return branch_copies(copies1), branch_copies(copies2), {}

1030

return branch_copies(copies1), branch_copies(copies2), {}

1063

1031

1064

1032

1065

def _related(f1, f2):

1033

def _related(f1, f2):

1066

"""return True if f1 and f2 filectx have a common ancestor

1034

"""return True if f1 and f2 filectx have a common ancestor

1067

1035

1068

Walk back to common ancestor to see if the two files originate

1036

Walk back to common ancestor to see if the two files originate

1069

from the same file. Since workingfilectx's rev() is None it messes

1037

from the same file. Since workingfilectx's rev() is None it messes

1070

up the integer comparison logic, hence the pre-step check for

1038

up the integer comparison logic, hence the pre-step check for

1071

None (f1 and f2 can only be workingfilectx's initially).

1039

None (f1 and f2 can only be workingfilectx's initially).

1072

"""

1040

"""

1073

1041

1074

if f1 == f2:

1042

if f1 == f2:

1075

return True # a match

1043

return True # a match

1076

1044

1077

g1, g2 = f1.ancestors(), f2.ancestors()

1045

g1, g2 = f1.ancestors(), f2.ancestors()

1078

try:

1046

try:

1079

f1r, f2r = f1.linkrev(), f2.linkrev()

1047

f1r, f2r = f1.linkrev(), f2.linkrev()

1080

1048

1081

if f1r is None:

1049

if f1r is None:

1082

f1 = next(g1)

1050

f1 = next(g1)

1083

if f2r is None:

1051

if f2r is None:

1084

f2 = next(g2)

1052

f2 = next(g2)

1085

1053

1086

while True:

1054

while True:

1087

f1r, f2r = f1.linkrev(), f2.linkrev()

1055

f1r, f2r = f1.linkrev(), f2.linkrev()

1088

if f1r > f2r:

1056

if f1r > f2r:

1089

f1 = next(g1)

1057

f1 = next(g1)

1090

elif f2r > f1r:

1058

elif f2r > f1r:

1091

f2 = next(g2)

1059

f2 = next(g2)

1092

else: # f1 and f2 point to files in the same linkrev

1060

else: # f1 and f2 point to files in the same linkrev

1093

return f1 == f2 # true if they point to the same file

1061

return f1 == f2 # true if they point to the same file

1094

except StopIteration:

1062

except StopIteration:

1095

return False

1063

return False

1096

1064

1097

1065

1098

def graftcopies(wctx, ctx, base):

1066

def graftcopies(wctx, ctx, base):

1099

"""reproduce copies between base and ctx in the wctx

1067

"""reproduce copies between base and ctx in the wctx

1100

1068

1101

Unlike mergecopies(), this function will only consider copies between base

1069

Unlike mergecopies(), this function will only consider copies between base

1102

and ctx; it will ignore copies between base and wctx. Also unlike

1070

and ctx; it will ignore copies between base and wctx. Also unlike

1103

mergecopies(), this function will apply copies to the working copy (instead

1071

mergecopies(), this function will apply copies to the working copy (instead

1104

of just returning information about the copies). That makes it cheaper

1072

of just returning information about the copies). That makes it cheaper

1105

(especially in the common case of base==ctx.p1()) and useful also when

1073

(especially in the common case of base==ctx.p1()) and useful also when

1106

experimental.copytrace=off.

1074

experimental.copytrace=off.

1107

1075

1108

merge.update() will have already marked most copies, but it will only

1076

merge.update() will have already marked most copies, but it will only

1109

mark copies if it thinks the source files are related (see

1077

mark copies if it thinks the source files are related (see

1110

merge._related()). It will also not mark copies if the file wasn't modified

1078

merge._related()). It will also not mark copies if the file wasn't modified

1111

on the local side. This function adds the copies that were "missed"

1079

on the local side. This function adds the copies that were "missed"

1112

by merge.update().

1080

by merge.update().

1113

"""

1081

"""

1114

new_copies = pathcopies(base, ctx)

1082

new_copies = pathcopies(base, ctx)

1115

_filter(wctx.p1(), wctx, new_copies)

1083

_filter(wctx.p1(), wctx, new_copies)

1116

for dst, src in pycompat.iteritems(new_copies):

1084

for dst, src in pycompat.iteritems(new_copies):

1117

wctx[dst].markcopied(src)

1085

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo):
-                """return a function that return multiple data given a <rev>"i
+                """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
-                * p1copies: mapping of copies from p1
+                * changes: a ChangingFiles object
-                * p2copies: mapping of copies from p2
-                * removed: a list of removed files
-                * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
-                def get_ismerged(rev):
-                    ctx = repo[rev]
-                    def ismerged(path):
-                        if path not in ctx.files():
-                            return False
-                        fctx = ctx[path]
-                        parents = fctx._filelog.parents(fctx._filenode)
-                        nb_parents = 0
-                        for n in parents:
-                            if n != node.nullid:
-                                nb_parents += 1
-                        return nb_parents >= 2
-                    return ismerged
                 changelogrevision = cl.changelogrevision
                 # A small cache to avoid doing the work twice for merges
                 #
                 # In the vast majority of cases, if we ask information for a revision
                 # about 1 parent, we'll later ask it for the other. So it make sense to
                 # keep the information around when reaching the first parent of a merge
                 # and dropping it after it was provided for the second parents.
                 #
                 # It exists cases were only one parent of the merge will be walked. It
                 # happens when the "destination" the copy tracing is descendant from a
                 # new root, not common with the "source". In that case, we will only walk
                 # through merge parents that are descendant of changesets common
                 # between "source" and "destination".
                 #
                 # With the current case implementation if such changesets have a copy
                 # information, we'll keep them in memory until the end of
                 # _changesetforwardcopies. We don't expect the case to be frequent
                 # enough to matters.
                 #
                 # In addition, it would be possible to reach pathological case, were
                 # many first parent are met before any second parent is reached. In
                 # that case the cache could grow. If this even become an issue one can
                 # safely introduce a maximum cache size. This would trade extra CPU/IO
                 # time to save memory.
                 merge_caches = {}
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     value = None
                     e = merge_caches.pop(rev, None)
                     if e is not None:
                         return e
-                    c = changelogrevision(rev)
+                    value = (p1, p2, changelogrevision(rev).changes)
-                    p1copies = c.p1copies
-                    p2copies = c.p2copies
-                    removed = c.filesremoved
                     if p1 != node.nullrev and p2 != node.nullrev:
                         # XXX some case we over cache, IGNORE
-                        value = merge_caches[rev] = (
+                        merge_caches[rev] = value
-                            p1,
-                            p2,
-                            p1copies,
-                            p2copies,
-                            removed,
-                            get_ismerged(rev),
-                    if value is None:
-                        value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
                     return value
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev  # XXX we should had chaching to this.
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 if repo.filecopiesmode == b'changeset-sidedata':
                     revinfo = _revinfo_getter(repo)
                     return _combine_changeset_copies(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
-                        p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
+                        p1, p2, changes = revinfo(c)
                         if r == p1:
                             parent = 1
-                            childcopies = p1copies
+                            childcopies = changes.copied_from_p1
                         else:
                             assert r == p2
                             parent = 2
-                            childcopies = p2copies
+                            childcopies = changes.copied_from_p2
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
-                        for f in removed:
+                        for f in changes.removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict(
-                                    othercopies, newcopies, isancestor, ismerged
+                                    othercopies, newcopies, isancestor, changes
                                 )
                             else:
                                 _merge_copies_dict(
-                                    newcopies, othercopies, isancestor, ismerged
+                                    newcopies, othercopies, isancestor, changes
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
-            def _merge_copies_dict(minor, major, isancestor, ismerged):
+            def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
-                            or ismerged(dest)
+                            or dest in changes.merged
                         ):
                             minor[dest] = value
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return (
                         '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>'
                         % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)