upstream/mercurial-mirror Commit - r46216:7a757e89

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from .revlogutils.flagutil import REVIDX_SIDEDATA

17

18

from . import (

16

from . import (

19

match as matchmod,

17

match as matchmod,

20

node,

18

node,

21

pathutil,

19

pathutil,

22

pycompat,

20

pycompat,

23

util,

21

util,

24

)

22

)

25

23

26

24

27

from .utils import stringutil

25

from .utils import stringutil

28

26

29

27

30

def _filter(src, dst, t):

28

def _filter(src, dst, t):

31

"""filters out invalid copies after chaining"""

29

"""filters out invalid copies after chaining"""

32

30

33

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

31

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

34

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

32

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

35

# in the following table (not including trivial cases). For example, case 2

33

# in the following table (not including trivial cases). For example, case 2

36

# is where a file existed in 'src' and remained under that name in 'mid' and

34

# is where a file existed in 'src' and remained under that name in 'mid' and

37

# then was renamed between 'mid' and 'dst'.

35

# then was renamed between 'mid' and 'dst'.

38

#

36

#

39

# case src mid dst result

37

# case src mid dst result

40

# 1 x y - -

38

# 1 x y - -

41

# 2 x y y x->y

39

# 2 x y y x->y

42

# 3 x y x -

40

# 3 x y x -

43

# 4 x y z x->z

41

# 4 x y z x->z

44

# 5 - x y -

42

# 5 - x y -

45

# 6 x x y x->y

43

# 6 x x y x->y

46

#

44

#

47

# _chain() takes care of chaining the copies in 'a' and 'b', but it

45

# _chain() takes care of chaining the copies in 'a' and 'b', but it

48

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

46

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

49

# between 5 and 6, so it includes all cases in its result.

47

# between 5 and 6, so it includes all cases in its result.

50

# Cases 1, 3, and 5 are then removed by _filter().

48

# Cases 1, 3, and 5 are then removed by _filter().

51

49

52

for k, v in list(t.items()):

50

for k, v in list(t.items()):

53

# remove copies from files that didn't exist

51

# remove copies from files that didn't exist

54

if v not in src:

52

if v not in src:

55

del t[k]

53

del t[k]

56

# remove criss-crossed copies

54

# remove criss-crossed copies

57

elif k in src and v in dst:

55

elif k in src and v in dst:

58

del t[k]

56

del t[k]

59

# remove copies to files that were then removed

57

# remove copies to files that were then removed

60

elif k not in dst:

58

elif k not in dst:

61

del t[k]

59

del t[k]

62

60

63

61

64

def _chain(prefix, suffix):

62

def _chain(prefix, suffix):

65

"""chain two sets of copies 'prefix' and 'suffix'"""

63

"""chain two sets of copies 'prefix' and 'suffix'"""

66

result = prefix.copy()

64

result = prefix.copy()

67

for key, value in pycompat.iteritems(suffix):

65

for key, value in pycompat.iteritems(suffix):

68

result[key] = prefix.get(value, value)

66

result[key] = prefix.get(value, value)

69

return result

67

return result

70

68

71

69

72

def _tracefile(fctx, am, basemf):

70

def _tracefile(fctx, am, basemf):

73

"""return file context that is the ancestor of fctx present in ancestor

71

"""return file context that is the ancestor of fctx present in ancestor

74

manifest am

72

manifest am

75

73

76

Note: we used to try and stop after a given limit, however checking if that

74

Note: we used to try and stop after a given limit, however checking if that

77

limit is reached turned out to be very expensive. we are better off

75

limit is reached turned out to be very expensive. we are better off

78

disabling that feature."""

76

disabling that feature."""

79

77

80

for f in fctx.ancestors():

78

for f in fctx.ancestors():

81

path = f.path()

79

path = f.path()

82

if am.get(path, None) == f.filenode():

80

if am.get(path, None) == f.filenode():

83

return path

81

return path

84

if basemf and basemf.get(path, None) == f.filenode():

82

if basemf and basemf.get(path, None) == f.filenode():

85

return path

83

return path

86

84

87

85

88

def _dirstatecopies(repo, match=None):

86

def _dirstatecopies(repo, match=None):

89

ds = repo.dirstate

87

ds = repo.dirstate

90

c = ds.copies().copy()

88

c = ds.copies().copy()

91

for k in list(c):

89

for k in list(c):

92

if ds[k] not in b'anm' or (match and not match(k)):

90

if ds[k] not in b'anm' or (match and not match(k)):

93

del c[k]

91

del c[k]

94

return c

92

return c

95

93

96

94

97

def _computeforwardmissing(a, b, match=None):

95

def _computeforwardmissing(a, b, match=None):

98

"""Computes which files are in b but not a.

96

"""Computes which files are in b but not a.

99

This is its own function so extensions can easily wrap this call to see what

97

This is its own function so extensions can easily wrap this call to see what

100

files _forwardcopies is about to process.

98

files _forwardcopies is about to process.

101

"""

99

"""

102

ma = a.manifest()

100

ma = a.manifest()

103

mb = b.manifest()

101

mb = b.manifest()

104

return mb.filesnotin(ma, match=match)

102

return mb.filesnotin(ma, match=match)

105

103

106

104

107

def usechangesetcentricalgo(repo):

105

def usechangesetcentricalgo(repo):

108

"""Checks if we should use changeset-centric copy algorithms"""

106

"""Checks if we should use changeset-centric copy algorithms"""

109

if repo.filecopiesmode == b'changeset-sidedata':

107

if repo.filecopiesmode == b'changeset-sidedata':

110

return True

108

return True

111

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

109

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

112

changesetsource = (b'changeset-only', b'compatibility')

110

changesetsource = (b'changeset-only', b'compatibility')

113

return readfrom in changesetsource

111

return readfrom in changesetsource

114

112

115

113

116

def _committedforwardcopies(a, b, base, match):

114

def _committedforwardcopies(a, b, base, match):

117

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

115

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

118

# files might have to be traced back to the fctx parent of the last

116

# files might have to be traced back to the fctx parent of the last

119

# one-side-only changeset, but not further back than that

117

# one-side-only changeset, but not further back than that

120

repo = a._repo

118

repo = a._repo

121

119

122

if usechangesetcentricalgo(repo):

120

if usechangesetcentricalgo(repo):

123

return _changesetforwardcopies(a, b, match)

121

return _changesetforwardcopies(a, b, match)

124

122

125

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

123

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

126

dbg = repo.ui.debug

124

dbg = repo.ui.debug

127

if debug:

125

if debug:

128

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

126

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

129

am = a.manifest()

127

am = a.manifest()

130

basemf = None if base is None else base.manifest()

128

basemf = None if base is None else base.manifest()

131

129

132

# find where new files came from

130

# find where new files came from

133

# we currently don't try to find where old files went, too expensive

131

# we currently don't try to find where old files went, too expensive

134

# this means we can miss a case like 'hg rm b; hg cp a b'

132

# this means we can miss a case like 'hg rm b; hg cp a b'

135

cm = {}

133

cm = {}

136

134

137

# Computing the forward missing is quite expensive on large manifests, since

135

# Computing the forward missing is quite expensive on large manifests, since

138

# it compares the entire manifests. We can optimize it in the common use

136

# it compares the entire manifests. We can optimize it in the common use

139

# case of computing what copies are in a commit versus its parent (like

137

# case of computing what copies are in a commit versus its parent (like

140

# during a rebase or histedit). Note, we exclude merge commits from this

138

# during a rebase or histedit). Note, we exclude merge commits from this

141

# optimization, since the ctx.files() for a merge commit is not correct for

139

# optimization, since the ctx.files() for a merge commit is not correct for

142

# this comparison.

140

# this comparison.

143

forwardmissingmatch = match

141

forwardmissingmatch = match

144

if b.p1() == a and b.p2().node() == node.nullid:

142

if b.p1() == a and b.p2().node() == node.nullid:

145

filesmatcher = matchmod.exact(b.files())

143

filesmatcher = matchmod.exact(b.files())

146

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

144

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

147

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

145

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

148

146

149

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

147

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

150

148

151

if debug:

149

if debug:

152

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

150

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

153

151

154

for f in sorted(missing):

152

for f in sorted(missing):

155

if debug:

153

if debug:

156

dbg(b'debug.copies: tracing file: %s\n' % f)

154

dbg(b'debug.copies: tracing file: %s\n' % f)

157

fctx = b[f]

155

fctx = b[f]

158

fctx._ancestrycontext = ancestrycontext

156

fctx._ancestrycontext = ancestrycontext

159

157

160

if debug:

158

if debug:

161

start = util.timer()

159

start = util.timer()

162

opath = _tracefile(fctx, am, basemf)

160

opath = _tracefile(fctx, am, basemf)

163

if opath:

161

if opath:

164

if debug:

162

if debug:

165

dbg(b'debug.copies: rename of: %s\n' % opath)

163

dbg(b'debug.copies: rename of: %s\n' % opath)

166

cm[f] = opath

164

cm[f] = opath

167

if debug:

165

if debug:

168

dbg(

166

dbg(

169

b'debug.copies: time: %f seconds\n'

167

b'debug.copies: time: %f seconds\n'

170

% (util.timer() - start)

168

% (util.timer() - start)

171

)

169

)

172

return cm

170

return cm

173

171

174

172

175

def _revinfo_getter(repo):

173

def _revinfo_getter(repo):

176

"""return a function that return multiple data given a <rev>"i

174

"""return a function that return multiple data given a <rev>"i

177

175

178

* p1: revision number of first parent

176

* p1: revision number of first parent

179

* p2: revision number of first parent

177

* p2: revision number of first parent

180

* p1copies: mapping of copies from p1

178

* p1copies: mapping of copies from p1

181

* p2copies: mapping of copies from p2

179

* p2copies: mapping of copies from p2

182

* removed: a list of removed files

180

* removed: a list of removed files

183

* ismerged: a callback to know if file was merged in that revision

181

* ismerged: a callback to know if file was merged in that revision

184

"""

182

"""

185

cl = repo.changelog

183

cl = repo.changelog

186

parents = cl.parentrevs

184

parents = cl.parentrevs

187

185

188

def get_ismerged(rev):

186

def get_ismerged(rev):

189

ctx = repo[rev]

187

ctx = repo[rev]

190

188

191

def ismerged(path):

189

def ismerged(path):

192

if path not in ctx.files():

190

if path not in ctx.files():

193

return False

191

return False

194

fctx = ctx[path]

192

fctx = ctx[path]

195

parents = fctx._filelog.parents(fctx._filenode)

193

parents = fctx._filelog.parents(fctx._filenode)

196

nb_parents = 0

194

nb_parents = 0

197

for n in parents:

195

for n in parents:

198

if n != node.nullid:

196

if n != node.nullid:

199

nb_parents += 1

197

nb_parents += 1

200

return nb_parents >= 2

198

return nb_parents >= 2

201

199

202

return ismerged

200

return ismerged

203

201

204

changelogrevision = cl.changelogrevision

202

changelogrevision = cl.changelogrevision

205

flags = cl.flags

206

203

207

# A small cache to avoid doing the work twice for merges

204

# A small cache to avoid doing the work twice for merges

208

#

205

#

209

# In the vast majority of cases, if we ask information for a revision

206

# In the vast majority of cases, if we ask information for a revision

210

# about 1 parent, we'll later ask it for the other. So it make sense to

207

# about 1 parent, we'll later ask it for the other. So it make sense to

211

# keep the information around when reaching the first parent of a merge

208

# keep the information around when reaching the first parent of a merge

212

# and dropping it after it was provided for the second parents.

209

# and dropping it after it was provided for the second parents.

213

#

210

#

214

# It exists cases were only one parent of the merge will be walked. It

211

# It exists cases were only one parent of the merge will be walked. It

215

# happens when the "destination" the copy tracing is descendant from a

212

# happens when the "destination" the copy tracing is descendant from a

216

# new root, not common with the "source". In that case, we will only walk

213

# new root, not common with the "source". In that case, we will only walk

217

# through merge parents that are descendant of changesets common

214

# through merge parents that are descendant of changesets common

218

# between "source" and "destination".

215

# between "source" and "destination".

219

#

216

#

220

# With the current case implementation if such changesets have a copy

217

# With the current case implementation if such changesets have a copy

221

# information, we'll keep them in memory until the end of

218

# information, we'll keep them in memory until the end of

222

# _changesetforwardcopies. We don't expect the case to be frequent

219

# _changesetforwardcopies. We don't expect the case to be frequent

223

# enough to matters.

220

# enough to matters.

224

#

221

#

225

# In addition, it would be possible to reach pathological case, were

222

# In addition, it would be possible to reach pathological case, were

226

# many first parent are met before any second parent is reached. In

223

# many first parent are met before any second parent is reached. In

227

# that case the cache could grow. If this even become an issue one can

224

# that case the cache could grow. If this even become an issue one can

228

# safely introduce a maximum cache size. This would trade extra CPU/IO

225

# safely introduce a maximum cache size. This would trade extra CPU/IO

229

# time to save memory.

226

# time to save memory.

230

merge_caches = {}

227

merge_caches = {}

231

228

232

def revinfo(rev):

229

def revinfo(rev):

233

p1, p2 = parents(rev)

230

p1, p2 = parents(rev)

234

value = None

231

value = None

235

if flags(rev) & REVIDX_SIDEDATA:

232

e = merge_caches.pop(rev, None)

236

e = merge_caches.pop(rev, None)

233

if e is not None:

237

if e is not None:

234

return e

238

return e

235

c = changelogrevision(rev)

239

c = changelogrevision(rev)

236

p1copies = c.p1copies

240

p1copies = c.p1copies

237

p2copies = c.p2copies

241

p2copies = c.p2copies

238

removed = c.filesremoved

242

removed = c.filesremoved

239

if p1 != node.nullrev and p2 != node.nullrev:

243

if p1 != node.nullrev and p2 != node.nullrev:

240

# XXX some case we over cache, IGNORE

244

# XXX some case we over cache, IGNORE

241

value = merge_caches[rev] = (

245

value = merge_caches[rev] = (

242

p1,

246

p1,

243

p2,

247

p2,

244

p1copies,

248

p1copies,

245

p2copies,

249

~~p2copies~~,

246

removed,

250

~~removed~~,

247

get_ismerged(rev),

251

get_ismerged(rev),

248

)

252

)

253

else:

254

p1copies = {}

255

p2copies = {}

256

removed = []

257

249

258

if value is None:

250

if value is None:

259

value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))

251

value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))

260

return value

252

return value

261

253

262

return revinfo

254

return revinfo

263

255

264

256

265

def _changesetforwardcopies(a, b, match):

257

def _changesetforwardcopies(a, b, match):

266

if a.rev() in (node.nullrev, b.rev()):

258

if a.rev() in (node.nullrev, b.rev()):

267

return {}

259

return {}

268

260

269

repo = a.repo().unfiltered()

261

repo = a.repo().unfiltered()

270

children = {}

262

children = {}

271

263

272

cl = repo.changelog

264

cl = repo.changelog

273

isancestor = cl.isancestorrev # XXX we should had chaching to this.

265

isancestor = cl.isancestorrev # XXX we should had chaching to this.

274

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

266

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

275

mrset = set(missingrevs)

267

mrset = set(missingrevs)

276

roots = set()

268

roots = set()

277

for r in missingrevs:

269

for r in missingrevs:

278

for p in cl.parentrevs(r):

270

for p in cl.parentrevs(r):

279

if p == node.nullrev:

271

if p == node.nullrev:

280

continue

272

continue

281

if p not in children:

273

if p not in children:

282

children[p] = [r]

274

children[p] = [r]

283

else:

275

else:

284

children[p].append(r)

276

children[p].append(r)

285

if p not in mrset:

277

if p not in mrset:

286

roots.add(p)

278

roots.add(p)

287

if not roots:

279

if not roots:

288

# no common revision to track copies from

280

# no common revision to track copies from

289

return {}

281

return {}

290

min_root = min(roots)

282

min_root = min(roots)

291

283

292

from_head = set(

284

from_head = set(

293

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

285

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

294

)

286

)

295

287

296

iterrevs = set(from_head)

288

iterrevs = set(from_head)

297

iterrevs &= mrset

289

iterrevs &= mrset

298

iterrevs.update(roots)

290

iterrevs.update(roots)

299

iterrevs.remove(b.rev())

291

iterrevs.remove(b.rev())

300

revs = sorted(iterrevs)

292

revs = sorted(iterrevs)

301

293

302

if repo.filecopiesmode == b'changeset-sidedata':

294

if repo.filecopiesmode == b'changeset-sidedata':

303

revinfo = _revinfo_getter(repo)

295

revinfo = _revinfo_getter(repo)

304

return _combine_changeset_copies(

296

return _combine_changeset_copies(

305

revs, children, b.rev(), revinfo, match, isancestor

297

revs, children, b.rev(), revinfo, match, isancestor

306

)

298

)

307

else:

299

else:

308

revinfo = _revinfo_getter_extra(repo)

300

revinfo = _revinfo_getter_extra(repo)

309

return _combine_changeset_copies_extra(

301

return _combine_changeset_copies_extra(

310

revs, children, b.rev(), revinfo, match, isancestor

302

revs, children, b.rev(), revinfo, match, isancestor

311

)

303

)

312

304

313

305

314

def _combine_changeset_copies(

306

def _combine_changeset_copies(

315

revs, children, targetrev, revinfo, match, isancestor

307

revs, children, targetrev, revinfo, match, isancestor

316

):

308

):

317

"""combine the copies information for each item of iterrevs

309

"""combine the copies information for each item of iterrevs

318

310

319

revs: sorted iterable of revision to visit

311

revs: sorted iterable of revision to visit

320

children: a {parent: [children]} mapping.

312

children: a {parent: [children]} mapping.

321

targetrev: the final copies destination revision (not in iterrevs)

313

targetrev: the final copies destination revision (not in iterrevs)

322

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

314

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

323

match: a matcher

315

match: a matcher

324

316

325

It returns the aggregated copies information for `targetrev`.

317

It returns the aggregated copies information for `targetrev`.

326

"""

318

"""

327

all_copies = {}

319

all_copies = {}

328

alwaysmatch = match.always()

320

alwaysmatch = match.always()

329

for r in revs:

321

for r in revs:

330

copies = all_copies.pop(r, None)

322

copies = all_copies.pop(r, None)

331

if copies is None:

323

if copies is None:

332

# this is a root

324

# this is a root

333

copies = {}

325

copies = {}

334

for i, c in enumerate(children[r]):

326

for i, c in enumerate(children[r]):

335

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

327

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

336

if r == p1:

328

if r == p1:

337

parent = 1

329

parent = 1

338

childcopies = p1copies

330

childcopies = p1copies

339

else:

331

else:

340

assert r == p2

332

assert r == p2

341

parent = 2

333

parent = 2

342

childcopies = p2copies

334

childcopies = p2copies

343

if not alwaysmatch:

335

if not alwaysmatch:

344

childcopies = {

336

childcopies = {

345

dst: src for dst, src in childcopies.items() if match(dst)

337

dst: src for dst, src in childcopies.items() if match(dst)

346

}

338

}

347

newcopies = copies

339

newcopies = copies

348

if childcopies:

340

if childcopies:

349

newcopies = copies.copy()

341

newcopies = copies.copy()

350

for dest, source in pycompat.iteritems(childcopies):

342

for dest, source in pycompat.iteritems(childcopies):

351

prev = copies.get(source)

343

prev = copies.get(source)

352

if prev is not None and prev[1] is not None:

344

if prev is not None and prev[1] is not None:

353

source = prev[1]

345

source = prev[1]

354

newcopies[dest] = (c, source)

346

newcopies[dest] = (c, source)

355

assert newcopies is not copies

347

assert newcopies is not copies

356

for f in removed:

348

for f in removed:

357

if f in newcopies:

349

if f in newcopies:

358

if newcopies is copies:

350

if newcopies is copies:

359

# copy on write to avoid affecting potential other

351

# copy on write to avoid affecting potential other

360

# branches. when there are no other branches, this

352

# branches. when there are no other branches, this

361

# could be avoided.

353

# could be avoided.

362

newcopies = copies.copy()

354

newcopies = copies.copy()

363

newcopies[f] = (c, None)

355

newcopies[f] = (c, None)

364

othercopies = all_copies.get(c)

356

othercopies = all_copies.get(c)

365

if othercopies is None:

357

if othercopies is None:

366

all_copies[c] = newcopies

358

all_copies[c] = newcopies

367

else:

359

else:

368

# we are the second parent to work on c, we need to merge our

360

# we are the second parent to work on c, we need to merge our

369

# work with the other.

361

# work with the other.

370

#

362

#

371

# In case of conflict, parent 1 take precedence over parent 2.

363

# In case of conflict, parent 1 take precedence over parent 2.

372

# This is an arbitrary choice made anew when implementing

364

# This is an arbitrary choice made anew when implementing

373

# changeset based copies. It was made without regards with

365

# changeset based copies. It was made without regards with

374

# potential filelog related behavior.

366

# potential filelog related behavior.

375

if parent == 1:

367

if parent == 1:

376

_merge_copies_dict(

368

_merge_copies_dict(

377

othercopies, newcopies, isancestor, ismerged

369

othercopies, newcopies, isancestor, ismerged

378

)

370

)

379

else:

371

else:

380

_merge_copies_dict(

372

_merge_copies_dict(

381

newcopies, othercopies, isancestor, ismerged

373

newcopies, othercopies, isancestor, ismerged

382

)

374

)

383

all_copies[c] = newcopies

375

all_copies[c] = newcopies

384

376

385

final_copies = {}

377

final_copies = {}

386

for dest, (tt, source) in all_copies[targetrev].items():

378

for dest, (tt, source) in all_copies[targetrev].items():

387

if source is not None:

379

if source is not None:

388

final_copies[dest] = source

380

final_copies[dest] = source

389

return final_copies

381

return final_copies

390

382

391

383

392

def _merge_copies_dict(minor, major, isancestor, ismerged):

384

def _merge_copies_dict(minor, major, isancestor, ismerged):

393

"""merge two copies-mapping together, minor and major

385

"""merge two copies-mapping together, minor and major

394

386

395

In case of conflict, value from "major" will be picked.

387

In case of conflict, value from "major" will be picked.

396

388

397

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

389

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

398

ancestors of `high_rev`,

390

ancestors of `high_rev`,

399

391

400

- `ismerged(path)`: callable return True if `path` have been merged in the

392

- `ismerged(path)`: callable return True if `path` have been merged in the

401

current revision,

393

current revision,

402

"""

394

"""

403

for dest, value in major.items():

395

for dest, value in major.items():

404

other = minor.get(dest)

396

other = minor.get(dest)

405

if other is None:

397

if other is None:

406

minor[dest] = value

398

minor[dest] = value

407

else:

399

else:

408

new_tt = value[0]

400

new_tt = value[0]

409

other_tt = other[0]

401

other_tt = other[0]

410

if value[1] == other[1]:

402

if value[1] == other[1]:

411

continue

403

continue

412

# content from "major" wins, unless it is older

404

# content from "major" wins, unless it is older

413

# than the branch point or there is a merge

405

# than the branch point or there is a merge

414

if (

406

if (

415

new_tt == other_tt

407

new_tt == other_tt

416

or not isancestor(new_tt, other_tt)

408

or not isancestor(new_tt, other_tt)

417

or ismerged(dest)

409

or ismerged(dest)

418

):

410

):

419

minor[dest] = value

411

minor[dest] = value

420

412

421

413

422

def _revinfo_getter_extra(repo):

414

def _revinfo_getter_extra(repo):

423

"""return a function that return multiple data given a <rev>"i

415

"""return a function that return multiple data given a <rev>"i

424

416

425

* p1: revision number of first parent

417

* p1: revision number of first parent

426

* p2: revision number of first parent

418

* p2: revision number of first parent

427

* p1copies: mapping of copies from p1

419

* p1copies: mapping of copies from p1

428

* p2copies: mapping of copies from p2

420

* p2copies: mapping of copies from p2

429

* removed: a list of removed files

421

* removed: a list of removed files

430

* ismerged: a callback to know if file was merged in that revision

422

* ismerged: a callback to know if file was merged in that revision

431

"""

423

"""

432

cl = repo.changelog

424

cl = repo.changelog

433

parents = cl.parentrevs

425

parents = cl.parentrevs

434

426

435

def get_ismerged(rev):

427

def get_ismerged(rev):

436

ctx = repo[rev]

428

ctx = repo[rev]

437

429

438

def ismerged(path):

430

def ismerged(path):

439

if path not in ctx.files():

431

if path not in ctx.files():

440

return False

432

return False

441

fctx = ctx[path]

433

fctx = ctx[path]

442

parents = fctx._filelog.parents(fctx._filenode)

434

parents = fctx._filelog.parents(fctx._filenode)

443

nb_parents = 0

435

nb_parents = 0

444

for n in parents:

436

for n in parents:

445

if n != node.nullid:

437

if n != node.nullid:

446

nb_parents += 1

438

nb_parents += 1

447

return nb_parents >= 2

439

return nb_parents >= 2

448

440

449

return ismerged

441

return ismerged

450

442

451

def revinfo(rev):

443

def revinfo(rev):

452

p1, p2 = parents(rev)

444

p1, p2 = parents(rev)

453

ctx = repo[rev]

445

ctx = repo[rev]

454

p1copies, p2copies = ctx._copies

446

p1copies, p2copies = ctx._copies

455

removed = ctx.filesremoved()

447

removed = ctx.filesremoved()

456

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

448

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

457

449

458

return revinfo

450

return revinfo

459

451

460

452

461

def _combine_changeset_copies_extra(

453

def _combine_changeset_copies_extra(

462

revs, children, targetrev, revinfo, match, isancestor

454

revs, children, targetrev, revinfo, match, isancestor

463

):

455

):

464

"""version of `_combine_changeset_copies` that works with the Google

456

"""version of `_combine_changeset_copies` that works with the Google

465

specific "extra" based storage for copy information"""

457

specific "extra" based storage for copy information"""

466

all_copies = {}

458

all_copies = {}

467

alwaysmatch = match.always()

459

alwaysmatch = match.always()

468

for r in revs:

460

for r in revs:

469

copies = all_copies.pop(r, None)

461

copies = all_copies.pop(r, None)

470

if copies is None:

462

if copies is None:

471

# this is a root

463

# this is a root

472

copies = {}

464

copies = {}

473

for i, c in enumerate(children[r]):

465

for i, c in enumerate(children[r]):

474

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

466

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

475

if r == p1:

467

if r == p1:

476

parent = 1

468

parent = 1

477

childcopies = p1copies

469

childcopies = p1copies

478

else:

470

else:

479

assert r == p2

471

assert r == p2

480

parent = 2

472

parent = 2

481

childcopies = p2copies

473

childcopies = p2copies

482

if not alwaysmatch:

474

if not alwaysmatch:

483

childcopies = {

475

childcopies = {

484

dst: src for dst, src in childcopies.items() if match(dst)

476

dst: src for dst, src in childcopies.items() if match(dst)

485

}

477

}

486

newcopies = copies

478

newcopies = copies

487

if childcopies:

479

if childcopies:

488

newcopies = copies.copy()

480

newcopies = copies.copy()

489

for dest, source in pycompat.iteritems(childcopies):

481

for dest, source in pycompat.iteritems(childcopies):

490

prev = copies.get(source)

482

prev = copies.get(source)

491

if prev is not None and prev[1] is not None:

483

if prev is not None and prev[1] is not None:

492

source = prev[1]

484

source = prev[1]

493

newcopies[dest] = (c, source)

485

newcopies[dest] = (c, source)

494

assert newcopies is not copies

486

assert newcopies is not copies

495

for f in removed:

487

for f in removed:

496

if f in newcopies:

488

if f in newcopies:

497

if newcopies is copies:

489

if newcopies is copies:

498

# copy on write to avoid affecting potential other

490

# copy on write to avoid affecting potential other

499

# branches. when there are no other branches, this

491

# branches. when there are no other branches, this

500

# could be avoided.

492

# could be avoided.

501

newcopies = copies.copy()

493

newcopies = copies.copy()

502

newcopies[f] = (c, None)

494

newcopies[f] = (c, None)

503

othercopies = all_copies.get(c)

495

othercopies = all_copies.get(c)

504

if othercopies is None:

496

if othercopies is None:

505

all_copies[c] = newcopies

497

all_copies[c] = newcopies

506

else:

498

else:

507

# we are the second parent to work on c, we need to merge our

499

# we are the second parent to work on c, we need to merge our

508

# work with the other.

500

# work with the other.

509

#

501

#

510

# In case of conflict, parent 1 take precedence over parent 2.

502

# In case of conflict, parent 1 take precedence over parent 2.

511

# This is an arbitrary choice made anew when implementing

503

# This is an arbitrary choice made anew when implementing

512

# changeset based copies. It was made without regards with

504

# changeset based copies. It was made without regards with

513

# potential filelog related behavior.

505

# potential filelog related behavior.

514

if parent == 1:

506

if parent == 1:

515

_merge_copies_dict_extra(

507

_merge_copies_dict_extra(

516

othercopies, newcopies, isancestor, ismerged

508

othercopies, newcopies, isancestor, ismerged

517

)

509

)

518

else:

510

else:

519

_merge_copies_dict_extra(

511

_merge_copies_dict_extra(

520

newcopies, othercopies, isancestor, ismerged

512

newcopies, othercopies, isancestor, ismerged

521

)

513

)

522

all_copies[c] = newcopies

514

all_copies[c] = newcopies

523

515

524

final_copies = {}

516

final_copies = {}

525

for dest, (tt, source) in all_copies[targetrev].items():

517

for dest, (tt, source) in all_copies[targetrev].items():

526

if source is not None:

518

if source is not None:

527

final_copies[dest] = source

519

final_copies[dest] = source

528

return final_copies

520

return final_copies

529

521

530

522

531

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

523

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

532

"""version of `_merge_copies_dict` that works with the Google

524

"""version of `_merge_copies_dict` that works with the Google

533

specific "extra" based storage for copy information"""

525

specific "extra" based storage for copy information"""

534

for dest, value in major.items():

526

for dest, value in major.items():

535

other = minor.get(dest)

527

other = minor.get(dest)

536

if other is None:

528

if other is None:

537

minor[dest] = value

529

minor[dest] = value

538

else:

530

else:

539

new_tt = value[0]

531

new_tt = value[0]

540

other_tt = other[0]

532

other_tt = other[0]

541

if value[1] == other[1]:

533

if value[1] == other[1]:

542

continue

534

continue

543

# content from "major" wins, unless it is older

535

# content from "major" wins, unless it is older

544

# than the branch point or there is a merge

536

# than the branch point or there is a merge

545

if (

537

if (

546

new_tt == other_tt

538

new_tt == other_tt

547

or not isancestor(new_tt, other_tt)

539

or not isancestor(new_tt, other_tt)

548

or ismerged(dest)

540

or ismerged(dest)

549

):

541

):

550

minor[dest] = value

542

minor[dest] = value

551

543

552

544

553

def _forwardcopies(a, b, base=None, match=None):

545

def _forwardcopies(a, b, base=None, match=None):

554

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

546

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

555

547

556

if base is None:

548

if base is None:

557

base = a

549

base = a

558

match = a.repo().narrowmatch(match)

550

match = a.repo().narrowmatch(match)

559

# check for working copy

551

# check for working copy

560

if b.rev() is None:

552

if b.rev() is None:

561

cm = _committedforwardcopies(a, b.p1(), base, match)

553

cm = _committedforwardcopies(a, b.p1(), base, match)

562

# combine copies from dirstate if necessary

554

# combine copies from dirstate if necessary

563

copies = _chain(cm, _dirstatecopies(b._repo, match))

555

copies = _chain(cm, _dirstatecopies(b._repo, match))

564

else:

556

else:

565

copies = _committedforwardcopies(a, b, base, match)

557

copies = _committedforwardcopies(a, b, base, match)

566

return copies

558

return copies

567

559

568

560

569

def _backwardrenames(a, b, match):

561

def _backwardrenames(a, b, match):

570

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

562

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

571

return {}

563

return {}

572

564

573

# Even though we're not taking copies into account, 1:n rename situations

565

# Even though we're not taking copies into account, 1:n rename situations

574

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

566

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

575

# arbitrarily pick one of the renames.

567

# arbitrarily pick one of the renames.

576

# We don't want to pass in "match" here, since that would filter

568

# We don't want to pass in "match" here, since that would filter

577

# the destination by it. Since we're reversing the copies, we want

569

# the destination by it. Since we're reversing the copies, we want

578

# to filter the source instead.

570

# to filter the source instead.

579

f = _forwardcopies(b, a)

571

f = _forwardcopies(b, a)

580

r = {}

572

r = {}

581

for k, v in sorted(pycompat.iteritems(f)):

573

for k, v in sorted(pycompat.iteritems(f)):

582

if match and not match(v):

574

if match and not match(v):

583

continue

575

continue

584

# remove copies

576

# remove copies

585

if v in a:

577

if v in a:

586

continue

578

continue

587

r[v] = k

579

r[v] = k

588

return r

580

return r

589

581

590

582

591

def pathcopies(x, y, match=None):

583

def pathcopies(x, y, match=None):

592

"""find {dst@y: src@x} copy mapping for directed compare"""

584

"""find {dst@y: src@x} copy mapping for directed compare"""

593

repo = x._repo

585

repo = x._repo

594

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

586

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

595

if debug:

587

if debug:

596

repo.ui.debug(

588

repo.ui.debug(

597

b'debug.copies: searching copies from %s to %s\n' % (x, y)

589

b'debug.copies: searching copies from %s to %s\n' % (x, y)

598

)

590

)

599

if x == y or not x or not y:

591

if x == y or not x or not y:

600

return {}

592

return {}

601

if y.rev() is None and x == y.p1():

593

if y.rev() is None and x == y.p1():

602

if debug:

594

if debug:

603

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

595

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

604

# short-circuit to avoid issues with merge states

596

# short-circuit to avoid issues with merge states

605

return _dirstatecopies(repo, match)

597

return _dirstatecopies(repo, match)

606

a = y.ancestor(x)

598

a = y.ancestor(x)

607

if a == x:

599

if a == x:

608

if debug:

600

if debug:

609

repo.ui.debug(b'debug.copies: search mode: forward\n')

601

repo.ui.debug(b'debug.copies: search mode: forward\n')

610

copies = _forwardcopies(x, y, match=match)

602

copies = _forwardcopies(x, y, match=match)

611

elif a == y:

603

elif a == y:

612

if debug:

604

if debug:

613

repo.ui.debug(b'debug.copies: search mode: backward\n')

605

repo.ui.debug(b'debug.copies: search mode: backward\n')

614

copies = _backwardrenames(x, y, match=match)

606

copies = _backwardrenames(x, y, match=match)

615

else:

607

else:

616

if debug:

608

if debug:

617

repo.ui.debug(b'debug.copies: search mode: combined\n')

609

repo.ui.debug(b'debug.copies: search mode: combined\n')

618

base = None

610

base = None

619

if a.rev() != node.nullrev:

611

if a.rev() != node.nullrev:

620

base = x

612

base = x

621

copies = _chain(

613

copies = _chain(

622

_backwardrenames(x, a, match=match),

614

_backwardrenames(x, a, match=match),

623

_forwardcopies(a, y, base, match=match),

615

_forwardcopies(a, y, base, match=match),

624

)

616

)

625

_filter(x, y, copies)

617

_filter(x, y, copies)

626

return copies

618

return copies

627

619

628

620

629

def mergecopies(repo, c1, c2, base):

621

def mergecopies(repo, c1, c2, base):

630

"""

622

"""

631

Finds moves and copies between context c1 and c2 that are relevant for

623

Finds moves and copies between context c1 and c2 that are relevant for

632

merging. 'base' will be used as the merge base.

624

merging. 'base' will be used as the merge base.

633

625

634

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

626

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

635

files that were moved/ copied in one merge parent and modified in another.

627

files that were moved/ copied in one merge parent and modified in another.

636

For example:

628

For example:

637

629

638

o ---> 4 another commit

630

o ---> 4 another commit

639

|

631

|

640

| o ---> 3 commit that modifies a.txt

632

| o ---> 3 commit that modifies a.txt

641

| /

633

| /

642

o / ---> 2 commit that moves a.txt to b.txt

634

o / ---> 2 commit that moves a.txt to b.txt

643

|/

635

|/

644

o ---> 1 merge base

636

o ---> 1 merge base

645

637

646

If we try to rebase revision 3 on revision 4, since there is no a.txt in

638

If we try to rebase revision 3 on revision 4, since there is no a.txt in

647

revision 4, and if user have copytrace disabled, we prints the following

639

revision 4, and if user have copytrace disabled, we prints the following

648

message:

640

message:

649

641

650

```other changed <file> which local deleted```

642

```other changed <file> which local deleted```

651

643

652

Returns a tuple where:

644

Returns a tuple where:

653

645

654

"branch_copies" an instance of branch_copies.

646

"branch_copies" an instance of branch_copies.

655

647

656

"diverge" is a mapping of source name -> list of destination names

648

"diverge" is a mapping of source name -> list of destination names

657

for divergent renames.

649

for divergent renames.

658

650

659

This function calls different copytracing algorithms based on config.

651

This function calls different copytracing algorithms based on config.

660

"""

652

"""

661

# avoid silly behavior for update from empty dir

653

# avoid silly behavior for update from empty dir

662

if not c1 or not c2 or c1 == c2:

654

if not c1 or not c2 or c1 == c2:

663

return branch_copies(), branch_copies(), {}

655

return branch_copies(), branch_copies(), {}

664

656

665

narrowmatch = c1.repo().narrowmatch()

657

narrowmatch = c1.repo().narrowmatch()

666

658

667

# avoid silly behavior for parent -> working dir

659

# avoid silly behavior for parent -> working dir

668

if c2.node() is None and c1.node() == repo.dirstate.p1():

660

if c2.node() is None and c1.node() == repo.dirstate.p1():

669

return (

661

return (

670

branch_copies(_dirstatecopies(repo, narrowmatch)),

662

branch_copies(_dirstatecopies(repo, narrowmatch)),

671

branch_copies(),

663

branch_copies(),

672

{},

664

{},

673

)

665

)

674

666

675

copytracing = repo.ui.config(b'experimental', b'copytrace')

667

copytracing = repo.ui.config(b'experimental', b'copytrace')

676

if stringutil.parsebool(copytracing) is False:

668

if stringutil.parsebool(copytracing) is False:

677

# stringutil.parsebool() returns None when it is unable to parse the

669

# stringutil.parsebool() returns None when it is unable to parse the

678

# value, so we should rely on making sure copytracing is on such cases

670

# value, so we should rely on making sure copytracing is on such cases

679

return branch_copies(), branch_copies(), {}

671

return branch_copies(), branch_copies(), {}

680

672

681

if usechangesetcentricalgo(repo):

673

if usechangesetcentricalgo(repo):

682

# The heuristics don't make sense when we need changeset-centric algos

674

# The heuristics don't make sense when we need changeset-centric algos

683

return _fullcopytracing(repo, c1, c2, base)

675

return _fullcopytracing(repo, c1, c2, base)

684

676

685

# Copy trace disabling is explicitly below the node == p1 logic above

677

# Copy trace disabling is explicitly below the node == p1 logic above

686

# because the logic above is required for a simple copy to be kept across a

678

# because the logic above is required for a simple copy to be kept across a

687

# rebase.

679

# rebase.

688

if copytracing == b'heuristics':

680

if copytracing == b'heuristics':

689

# Do full copytracing if only non-public revisions are involved as

681

# Do full copytracing if only non-public revisions are involved as

690

# that will be fast enough and will also cover the copies which could

682

# that will be fast enough and will also cover the copies which could

691

# be missed by heuristics

683

# be missed by heuristics

692

if _isfullcopytraceable(repo, c1, base):

684

if _isfullcopytraceable(repo, c1, base):

693

return _fullcopytracing(repo, c1, c2, base)

685

return _fullcopytracing(repo, c1, c2, base)

694

return _heuristicscopytracing(repo, c1, c2, base)

686

return _heuristicscopytracing(repo, c1, c2, base)

695

else:

687

else:

696

return _fullcopytracing(repo, c1, c2, base)

688

return _fullcopytracing(repo, c1, c2, base)

697

689

698

690

699

def _isfullcopytraceable(repo, c1, base):

691

def _isfullcopytraceable(repo, c1, base):

700

""" Checks that if base, source and destination are all no-public branches,

692

""" Checks that if base, source and destination are all no-public branches,

701

if yes let's use the full copytrace algorithm for increased capabilities

693

if yes let's use the full copytrace algorithm for increased capabilities

702

since it will be fast enough.

694

since it will be fast enough.

703

695

704

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

696

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

705

number of changesets from c1 to base such that if number of changesets are

697

number of changesets from c1 to base such that if number of changesets are

706

more than the limit, full copytracing algorithm won't be used.

698

more than the limit, full copytracing algorithm won't be used.

707

"""

699

"""

708

if c1.rev() is None:

700

if c1.rev() is None:

709

c1 = c1.p1()

701

c1 = c1.p1()

710

if c1.mutable() and base.mutable():

702

if c1.mutable() and base.mutable():

711

sourcecommitlimit = repo.ui.configint(

703

sourcecommitlimit = repo.ui.configint(

712

b'experimental', b'copytrace.sourcecommitlimit'

704

b'experimental', b'copytrace.sourcecommitlimit'

713

)

705

)

714

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

706

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

715

return commits < sourcecommitlimit

707

return commits < sourcecommitlimit

716

return False

708

return False

717

709

718

710

719

def _checksinglesidecopies(

711

def _checksinglesidecopies(

720

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

712

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

721

):

713

):

722

if src not in m2:

714

if src not in m2:

723

# deleted on side 2

715

# deleted on side 2

724

if src not in m1:

716

if src not in m1:

725

# renamed on side 1, deleted on side 2

717

# renamed on side 1, deleted on side 2

726

renamedelete[src] = dsts1

718

renamedelete[src] = dsts1

727

elif src not in mb:

719

elif src not in mb:

728

# Work around the "short-circuit to avoid issues with merge states"

720

# Work around the "short-circuit to avoid issues with merge states"

729

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

721

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

730

# destination doesn't exist in y.

722

# destination doesn't exist in y.

731

pass

723

pass

732

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

724

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

733

return

725

return

734

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

726

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

735

# modified on side 2

727

# modified on side 2

736

for dst in dsts1:

728

for dst in dsts1:

737

copy[dst] = src

729

copy[dst] = src

738

730

739

731

740

class branch_copies(object):

732

class branch_copies(object):

741

"""Information about copies made on one side of a merge/graft.

733

"""Information about copies made on one side of a merge/graft.

742

734

743

"copy" is a mapping from destination name -> source name,

735

"copy" is a mapping from destination name -> source name,

744

where source is in c1 and destination is in c2 or vice-versa.

736

where source is in c1 and destination is in c2 or vice-versa.

745

737

746

"movewithdir" is a mapping from source name -> destination name,

738

"movewithdir" is a mapping from source name -> destination name,

747

where the file at source present in one context but not the other

739

where the file at source present in one context but not the other

748

needs to be moved to destination by the merge process, because the

740

needs to be moved to destination by the merge process, because the

749

other context moved the directory it is in.

741

other context moved the directory it is in.

750

742

751

"renamedelete" is a mapping of source name -> list of destination

743

"renamedelete" is a mapping of source name -> list of destination

752

names for files deleted in c1 that were renamed in c2 or vice-versa.

744

names for files deleted in c1 that were renamed in c2 or vice-versa.

753

745

754

"dirmove" is a mapping of detected source dir -> destination dir renames.

746

"dirmove" is a mapping of detected source dir -> destination dir renames.

755

This is needed for handling changes to new files previously grafted into

747

This is needed for handling changes to new files previously grafted into

756

renamed directories.

748

renamed directories.

757

"""

749

"""

758

750

759

def __init__(

751

def __init__(

760

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

752

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

761

):

753

):

762

self.copy = {} if copy is None else copy

754

self.copy = {} if copy is None else copy

763

self.renamedelete = {} if renamedelete is None else renamedelete

755

self.renamedelete = {} if renamedelete is None else renamedelete

764

self.dirmove = {} if dirmove is None else dirmove

756

self.dirmove = {} if dirmove is None else dirmove

765

self.movewithdir = {} if movewithdir is None else movewithdir

757

self.movewithdir = {} if movewithdir is None else movewithdir

766

758

767

def __repr__(self):

759

def __repr__(self):

768

return (

760

return (

769

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

761

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

770

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

762

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

771

)

763

)

772

764

773

765

774

def _fullcopytracing(repo, c1, c2, base):

766

def _fullcopytracing(repo, c1, c2, base):

775

""" The full copytracing algorithm which finds all the new files that were

767

""" The full copytracing algorithm which finds all the new files that were

776

added from merge base up to the top commit and for each file it checks if

768

added from merge base up to the top commit and for each file it checks if

777

this file was copied from another file.

769

this file was copied from another file.

778

770

779

This is pretty slow when a lot of changesets are involved but will track all

771

This is pretty slow when a lot of changesets are involved but will track all

780

the copies.

772

the copies.

781

"""

773

"""

782

m1 = c1.manifest()

774

m1 = c1.manifest()

783

m2 = c2.manifest()

775

m2 = c2.manifest()

784

mb = base.manifest()

776

mb = base.manifest()

785

777

786

copies1 = pathcopies(base, c1)

778

copies1 = pathcopies(base, c1)

787

copies2 = pathcopies(base, c2)

779

copies2 = pathcopies(base, c2)

788

780

789

if not (copies1 or copies2):

781

if not (copies1 or copies2):

790

return branch_copies(), branch_copies(), {}

782

return branch_copies(), branch_copies(), {}

791

783

792

inversecopies1 = {}

784

inversecopies1 = {}

793

inversecopies2 = {}

785

inversecopies2 = {}

794

for dst, src in copies1.items():

786

for dst, src in copies1.items():

795

inversecopies1.setdefault(src, []).append(dst)

787

inversecopies1.setdefault(src, []).append(dst)

796

for dst, src in copies2.items():

788

for dst, src in copies2.items():

797

inversecopies2.setdefault(src, []).append(dst)

789

inversecopies2.setdefault(src, []).append(dst)

798

790

799

copy1 = {}

791

copy1 = {}

800

copy2 = {}

792

copy2 = {}

801

diverge = {}

793

diverge = {}

802

renamedelete1 = {}

794

renamedelete1 = {}

803

renamedelete2 = {}

795

renamedelete2 = {}

804

allsources = set(inversecopies1) | set(inversecopies2)

796

allsources = set(inversecopies1) | set(inversecopies2)

805

for src in allsources:

797

for src in allsources:

806

dsts1 = inversecopies1.get(src)

798

dsts1 = inversecopies1.get(src)

807

dsts2 = inversecopies2.get(src)

799

dsts2 = inversecopies2.get(src)

808

if dsts1 and dsts2:

800

if dsts1 and dsts2:

809

# copied/renamed on both sides

801

# copied/renamed on both sides

810

if src not in m1 and src not in m2:

802

if src not in m1 and src not in m2:

811

# renamed on both sides

803

# renamed on both sides

812

dsts1 = set(dsts1)

804

dsts1 = set(dsts1)

813

dsts2 = set(dsts2)

805

dsts2 = set(dsts2)

814

# If there's some overlap in the rename destinations, we

806

# If there's some overlap in the rename destinations, we

815

# consider it not divergent. For example, if side 1 copies 'a'

807

# consider it not divergent. For example, if side 1 copies 'a'

816

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

808

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

817

# and 'd' and deletes 'a'.

809

# and 'd' and deletes 'a'.

818

if dsts1 & dsts2:

810

if dsts1 & dsts2:

819

for dst in dsts1 & dsts2:

811

for dst in dsts1 & dsts2:

820

copy1[dst] = src

812

copy1[dst] = src

821

copy2[dst] = src

813

copy2[dst] = src

822

else:

814

else:

823

diverge[src] = sorted(dsts1 | dsts2)

815

diverge[src] = sorted(dsts1 | dsts2)

824

elif src in m1 and src in m2:

816

elif src in m1 and src in m2:

825

# copied on both sides

817

# copied on both sides

826

dsts1 = set(dsts1)

818

dsts1 = set(dsts1)

827

dsts2 = set(dsts2)

819

dsts2 = set(dsts2)

828

for dst in dsts1 & dsts2:

820

for dst in dsts1 & dsts2:

829

copy1[dst] = src

821

copy1[dst] = src

830

copy2[dst] = src

822

copy2[dst] = src

831

# TODO: Handle cases where it was renamed on one side and copied

823

# TODO: Handle cases where it was renamed on one side and copied

832

# on the other side

824

# on the other side

833

elif dsts1:

825

elif dsts1:

834

# copied/renamed only on side 1

826

# copied/renamed only on side 1

835

_checksinglesidecopies(

827

_checksinglesidecopies(

836

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

828

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

837

)

829

)

838

elif dsts2:

830

elif dsts2:

839

# copied/renamed only on side 2

831

# copied/renamed only on side 2

840

_checksinglesidecopies(

832

_checksinglesidecopies(

841

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

833

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

842

)

834

)

843

835

844

# find interesting file sets from manifests

836

# find interesting file sets from manifests

845

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

837

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

846

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

838

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

847

u1 = sorted(addedinm1 - addedinm2)

839

u1 = sorted(addedinm1 - addedinm2)

848

u2 = sorted(addedinm2 - addedinm1)

840

u2 = sorted(addedinm2 - addedinm1)

849

841

850

header = b" unmatched files in %s"

842

header = b" unmatched files in %s"

851

if u1:

843

if u1:

852

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

844

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

853

if u2:

845

if u2:

854

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

846

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

855

847

856

if repo.ui.debugflag:

848

if repo.ui.debugflag:

857

renamedeleteset = set()

849

renamedeleteset = set()

858

divergeset = set()

850

divergeset = set()

859

for dsts in diverge.values():

851

for dsts in diverge.values():

860

divergeset.update(dsts)

852

divergeset.update(dsts)

861

for dsts in renamedelete1.values():

853

for dsts in renamedelete1.values():

862

renamedeleteset.update(dsts)

854

renamedeleteset.update(dsts)

863

for dsts in renamedelete2.values():

855

for dsts in renamedelete2.values():

864

renamedeleteset.update(dsts)

856

renamedeleteset.update(dsts)

865

857

866

repo.ui.debug(

858

repo.ui.debug(

867

b" all copies found (* = to merge, ! = divergent, "

859

b" all copies found (* = to merge, ! = divergent, "

868

b"% = renamed and deleted):\n"

860

b"% = renamed and deleted):\n"

869

)

861

)

870

for side, copies in ((b"local", copies1), (b"remote", copies2)):

862

for side, copies in ((b"local", copies1), (b"remote", copies2)):

871

if not copies:

863

if not copies:

872

continue

864

continue

873

repo.ui.debug(b" on %s side:\n" % side)

865

repo.ui.debug(b" on %s side:\n" % side)

874

for f in sorted(copies):

866

for f in sorted(copies):

875

note = b""

867

note = b""

876

if f in copy1 or f in copy2:

868

if f in copy1 or f in copy2:

877

note += b"*"

869

note += b"*"

878

if f in divergeset:

870

if f in divergeset:

879

note += b"!"

871

note += b"!"

880

if f in renamedeleteset:

872

if f in renamedeleteset:

881

note += b"%"

873

note += b"%"

882

repo.ui.debug(

874

repo.ui.debug(

883

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

875

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

884

)

876

)

885

del renamedeleteset

877

del renamedeleteset

886

del divergeset

878

del divergeset

887

879

888

repo.ui.debug(b" checking for directory renames\n")

880

repo.ui.debug(b" checking for directory renames\n")

889

881

890

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

882

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

891

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

883

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

892

884

893

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

885

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

894

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

886

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

895

887

896

return branch_copies1, branch_copies2, diverge

888

return branch_copies1, branch_copies2, diverge

897

889

898

890

899

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

891

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

900

"""Finds moved directories and files that should move with them.

892

"""Finds moved directories and files that should move with them.

901

893

902

ctx: the context for one of the sides

894

ctx: the context for one of the sides

903

copy: files copied on the same side (as ctx)

895

copy: files copied on the same side (as ctx)

904

fullcopy: files copied on the same side (as ctx), including those that

896

fullcopy: files copied on the same side (as ctx), including those that

905

merge.manifestmerge() won't care about

897

merge.manifestmerge() won't care about

906

addedfiles: added files on the other side (compared to ctx)

898

addedfiles: added files on the other side (compared to ctx)

907

"""

899

"""

908

# generate a directory move map

900

# generate a directory move map

909

d = ctx.dirs()

901

d = ctx.dirs()

910

invalid = set()

902

invalid = set()

911

dirmove = {}

903

dirmove = {}

912

904

913

# examine each file copy for a potential directory move, which is

905

# examine each file copy for a potential directory move, which is

914

# when all the files in a directory are moved to a new directory

906

# when all the files in a directory are moved to a new directory

915

for dst, src in pycompat.iteritems(fullcopy):

907

for dst, src in pycompat.iteritems(fullcopy):

916

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

908

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

917

if dsrc in invalid:

909

if dsrc in invalid:

918

# already seen to be uninteresting

910

# already seen to be uninteresting

919

continue

911

continue

920

elif dsrc in d and ddst in d:

912

elif dsrc in d and ddst in d:

921

# directory wasn't entirely moved locally

913

# directory wasn't entirely moved locally

922

invalid.add(dsrc)

914

invalid.add(dsrc)

923

elif dsrc in dirmove and dirmove[dsrc] != ddst:

915

elif dsrc in dirmove and dirmove[dsrc] != ddst:

924

# files from the same directory moved to two different places

916

# files from the same directory moved to two different places

925

invalid.add(dsrc)

917

invalid.add(dsrc)

926

else:

918

else:

927

# looks good so far

919

# looks good so far

928

dirmove[dsrc] = ddst

920

dirmove[dsrc] = ddst

929

921

930

for i in invalid:

922

for i in invalid:

931

if i in dirmove:

923

if i in dirmove:

932

del dirmove[i]

924

del dirmove[i]

933

del d, invalid

925

del d, invalid

934

926

935

if not dirmove:

927

if not dirmove:

936

return {}, {}

928

return {}, {}

937

929

938

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

930

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

939

931

940

for d in dirmove:

932

for d in dirmove:

941

repo.ui.debug(

933

repo.ui.debug(

942

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

934

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

943

)

935

)

944

936

945

movewithdir = {}

937

movewithdir = {}

946

# check unaccounted nonoverlapping files against directory moves

938

# check unaccounted nonoverlapping files against directory moves

947

for f in addedfiles:

939

for f in addedfiles:

948

if f not in fullcopy:

940

if f not in fullcopy:

949

for d in dirmove:

941

for d in dirmove:

950

if f.startswith(d):

942

if f.startswith(d):

951

# new file added in a directory that was moved, move it

943

# new file added in a directory that was moved, move it

952

df = dirmove[d] + f[len(d) :]

944

df = dirmove[d] + f[len(d) :]

953

if df not in copy:

945

if df not in copy:

954

movewithdir[f] = df

946

movewithdir[f] = df

955

repo.ui.debug(

947

repo.ui.debug(

956

b" pending file src: '%s' -> dst: '%s'\n"

948

b" pending file src: '%s' -> dst: '%s'\n"

957

% (f, df)

949

% (f, df)

958

)

950

)

959

break

951

break

960

952

961

return dirmove, movewithdir

953

return dirmove, movewithdir

962

954

963

955

964

def _heuristicscopytracing(repo, c1, c2, base):

956

def _heuristicscopytracing(repo, c1, c2, base):

965

""" Fast copytracing using filename heuristics

957

""" Fast copytracing using filename heuristics

966

958

967

Assumes that moves or renames are of following two types:

959

Assumes that moves or renames are of following two types:

968

960

969

1) Inside a directory only (same directory name but different filenames)

961

1) Inside a directory only (same directory name but different filenames)

970

2) Move from one directory to another

962

2) Move from one directory to another

971

(same filenames but different directory names)

963

(same filenames but different directory names)

972

964

973

Works only when there are no merge commits in the "source branch".

965

Works only when there are no merge commits in the "source branch".

974

Source branch is commits from base up to c2 not including base.

966

Source branch is commits from base up to c2 not including base.

975

967

976

If merge is involved it fallbacks to _fullcopytracing().

968

If merge is involved it fallbacks to _fullcopytracing().

977

969

978

Can be used by setting the following config:

970

Can be used by setting the following config:

979

971

980

[experimental]

972

[experimental]

981

copytrace = heuristics

973

copytrace = heuristics

982

974

983

In some cases the copy/move candidates found by heuristics can be very large

975

In some cases the copy/move candidates found by heuristics can be very large

984

in number and that will make the algorithm slow. The number of possible

976

in number and that will make the algorithm slow. The number of possible

985

candidates to check can be limited by using the config

977

candidates to check can be limited by using the config

986

`experimental.copytrace.movecandidateslimit` which defaults to 100.

978

`experimental.copytrace.movecandidateslimit` which defaults to 100.

987

"""

979

"""

988

980

989

if c1.rev() is None:

981

if c1.rev() is None:

990

c1 = c1.p1()

982

c1 = c1.p1()

991

if c2.rev() is None:

983

if c2.rev() is None:

992

c2 = c2.p1()

984

c2 = c2.p1()

993

985

994

changedfiles = set()

986

changedfiles = set()

995

m1 = c1.manifest()

987

m1 = c1.manifest()

996

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

988

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

997

# If base is not in c2 branch, we switch to fullcopytracing

989

# If base is not in c2 branch, we switch to fullcopytracing

998

repo.ui.debug(

990

repo.ui.debug(

999

b"switching to full copytracing as base is not "

991

b"switching to full copytracing as base is not "

1000

b"an ancestor of c2\n"

992

b"an ancestor of c2\n"

1001

)

993

)

1002

return _fullcopytracing(repo, c1, c2, base)

994

return _fullcopytracing(repo, c1, c2, base)

1003

995

1004

ctx = c2

996

ctx = c2

1005

while ctx != base:

997

while ctx != base:

1006

if len(ctx.parents()) == 2:

998

if len(ctx.parents()) == 2:

1007

# To keep things simple let's not handle merges

999

# To keep things simple let's not handle merges

1008

repo.ui.debug(b"switching to full copytracing because of merges\n")

1000

repo.ui.debug(b"switching to full copytracing because of merges\n")

1009

return _fullcopytracing(repo, c1, c2, base)

1001

return _fullcopytracing(repo, c1, c2, base)

1010

changedfiles.update(ctx.files())

1002

changedfiles.update(ctx.files())

1011

ctx = ctx.p1()

1003

ctx = ctx.p1()

1012

1004

1013

copies2 = {}

1005

copies2 = {}

1014

cp = _forwardcopies(base, c2)

1006

cp = _forwardcopies(base, c2)

1015

for dst, src in pycompat.iteritems(cp):

1007

for dst, src in pycompat.iteritems(cp):

1016

if src in m1:

1008

if src in m1:

1017

copies2[dst] = src

1009

copies2[dst] = src

1018

1010

1019

# file is missing if it isn't present in the destination, but is present in

1011

# file is missing if it isn't present in the destination, but is present in

1020

# the base and present in the source.

1012

# the base and present in the source.

1021

# Presence in the base is important to exclude added files, presence in the

1013

# Presence in the base is important to exclude added files, presence in the

1022

# source is important to exclude removed files.

1014

# source is important to exclude removed files.

1023

filt = lambda f: f not in m1 and f in base and f in c2

1015

filt = lambda f: f not in m1 and f in base and f in c2

1024

missingfiles = [f for f in changedfiles if filt(f)]

1016

missingfiles = [f for f in changedfiles if filt(f)]

1025

1017

1026

copies1 = {}

1018

copies1 = {}

1027

if missingfiles:

1019

if missingfiles:

1028

basenametofilename = collections.defaultdict(list)

1020

basenametofilename = collections.defaultdict(list)

1029

dirnametofilename = collections.defaultdict(list)

1021

dirnametofilename = collections.defaultdict(list)

1030

1022

1031

for f in m1.filesnotin(base.manifest()):

1023

for f in m1.filesnotin(base.manifest()):

1032

basename = os.path.basename(f)

1024

basename = os.path.basename(f)

1033

dirname = os.path.dirname(f)

1025

dirname = os.path.dirname(f)

1034

basenametofilename[basename].append(f)

1026

basenametofilename[basename].append(f)

1035

dirnametofilename[dirname].append(f)

1027

dirnametofilename[dirname].append(f)

1036

1028

1037

for f in missingfiles:

1029

for f in missingfiles:

1038

basename = os.path.basename(f)

1030

basename = os.path.basename(f)

1039

dirname = os.path.dirname(f)

1031

dirname = os.path.dirname(f)

1040

samebasename = basenametofilename[basename]

1032

samebasename = basenametofilename[basename]

1041

samedirname = dirnametofilename[dirname]

1033

samedirname = dirnametofilename[dirname]

1042

movecandidates = samebasename + samedirname

1034

movecandidates = samebasename + samedirname

1043

# f is guaranteed to be present in c2, that's why

1035

# f is guaranteed to be present in c2, that's why

1044

# c2.filectx(f) won't fail

1036

# c2.filectx(f) won't fail

1045

f2 = c2.filectx(f)

1037

f2 = c2.filectx(f)

1046

# we can have a lot of candidates which can slow down the heuristics

1038

# we can have a lot of candidates which can slow down the heuristics

1047

# config value to limit the number of candidates moves to check

1039

# config value to limit the number of candidates moves to check

1048

maxcandidates = repo.ui.configint(

1040

maxcandidates = repo.ui.configint(

1049

b'experimental', b'copytrace.movecandidateslimit'

1041

b'experimental', b'copytrace.movecandidateslimit'

1050

)

1042

)

1051

1043

1052

if len(movecandidates) > maxcandidates:

1044

if len(movecandidates) > maxcandidates:

1053

repo.ui.status(

1045

repo.ui.status(

1054

_(

1046

_(

1055

b"skipping copytracing for '%s', more "

1047

b"skipping copytracing for '%s', more "

1056

b"candidates than the limit: %d\n"

1048

b"candidates than the limit: %d\n"

1057

)

1049

)

1058

% (f, len(movecandidates))

1050

% (f, len(movecandidates))

1059

)

1051

)

1060

continue

1052

continue

1061

1053

1062

for candidate in movecandidates:

1054

for candidate in movecandidates:

1063

f1 = c1.filectx(candidate)

1055

f1 = c1.filectx(candidate)

1064

if _related(f1, f2):

1056

if _related(f1, f2):

1065

# if there are a few related copies then we'll merge

1057

# if there are a few related copies then we'll merge

1066

# changes into all of them. This matches the behaviour

1058

# changes into all of them. This matches the behaviour

1067

# of upstream copytracing

1059

# of upstream copytracing

1068

copies1[candidate] = f

1060

copies1[candidate] = f

1069

1061

1070

return branch_copies(copies1), branch_copies(copies2), {}

1062

return branch_copies(copies1), branch_copies(copies2), {}

1071

1063

1072

1064

1073

def _related(f1, f2):

1065

def _related(f1, f2):

1074

"""return True if f1 and f2 filectx have a common ancestor

1066

"""return True if f1 and f2 filectx have a common ancestor

1075

1067

1076

Walk back to common ancestor to see if the two files originate

1068

Walk back to common ancestor to see if the two files originate

1077

from the same file. Since workingfilectx's rev() is None it messes

1069

from the same file. Since workingfilectx's rev() is None it messes

1078

up the integer comparison logic, hence the pre-step check for

1070

up the integer comparison logic, hence the pre-step check for

1079

None (f1 and f2 can only be workingfilectx's initially).

1071

None (f1 and f2 can only be workingfilectx's initially).

1080

"""

1072

"""

1081

1073

1082

if f1 == f2:

1074

if f1 == f2:

1083

return True # a match

1075

return True # a match

1084

1076

1085

g1, g2 = f1.ancestors(), f2.ancestors()

1077

g1, g2 = f1.ancestors(), f2.ancestors()

1086

try:

1078

try:

1087

f1r, f2r = f1.linkrev(), f2.linkrev()

1079

f1r, f2r = f1.linkrev(), f2.linkrev()

1088

1080

1089

if f1r is None:

1081

if f1r is None:

1090

f1 = next(g1)

1082

f1 = next(g1)

1091

if f2r is None:

1083

if f2r is None:

1092

f2 = next(g2)

1084

f2 = next(g2)

1093

1085

1094

while True:

1086

while True:

1095

f1r, f2r = f1.linkrev(), f2.linkrev()

1087

f1r, f2r = f1.linkrev(), f2.linkrev()

1096

if f1r > f2r:

1088

if f1r > f2r:

1097

f1 = next(g1)

1089

f1 = next(g1)

1098

elif f2r > f1r:

1090

elif f2r > f1r:

1099

f2 = next(g2)

1091

f2 = next(g2)

1100

else: # f1 and f2 point to files in the same linkrev

1092

else: # f1 and f2 point to files in the same linkrev

1101

return f1 == f2 # true if they point to the same file

1093

return f1 == f2 # true if they point to the same file

1102

except StopIteration:

1094

except StopIteration:

1103

return False

1095

return False

1104

1096

1105

1097

1106

def graftcopies(wctx, ctx, base):

1098

def graftcopies(wctx, ctx, base):

1107

"""reproduce copies between base and ctx in the wctx

1099

"""reproduce copies between base and ctx in the wctx

1108

1100

1109

Unlike mergecopies(), this function will only consider copies between base

1101

Unlike mergecopies(), this function will only consider copies between base

1110

and ctx; it will ignore copies between base and wctx. Also unlike

1102

and ctx; it will ignore copies between base and wctx. Also unlike

1111

mergecopies(), this function will apply copies to the working copy (instead

1103

mergecopies(), this function will apply copies to the working copy (instead

1112

of just returning information about the copies). That makes it cheaper

1104

of just returning information about the copies). That makes it cheaper

1113

(especially in the common case of base==ctx.p1()) and useful also when

1105

(especially in the common case of base==ctx.p1()) and useful also when

1114

experimental.copytrace=off.

1106

experimental.copytrace=off.

1115

1107

1116

merge.update() will have already marked most copies, but it will only

1108

merge.update() will have already marked most copies, but it will only

1117

mark copies if it thinks the source files are related (see

1109

mark copies if it thinks the source files are related (see

1118

merge._related()). It will also not mark copies if the file wasn't modified

1110

merge._related()). It will also not mark copies if the file wasn't modified

1119

on the local side. This function adds the copies that were "missed"

1111

on the local side. This function adds the copies that were "missed"

1120

by merge.update().

1112

by merge.update().

1121

"""

1113

"""

1122

new_copies = pathcopies(base, ctx)

1114

new_copies = pathcopies(base, ctx)

1123

_filter(wctx.p1(), wctx, new_copies)

1115

_filter(wctx.p1(), wctx, new_copies)

1124

for dst, src in pycompat.iteritems(new_copies):

1116

for dst, src in pycompat.iteritems(new_copies):

1125

wctx[dst].markcopied(src)

1117

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
-            from .revlogutils.flagutil import REVIDX_SIDEDATA
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 changelogrevision = cl.changelogrevision
-                flags = cl.flags
                 # A small cache to avoid doing the work twice for merges
                 #
                 # In the vast majority of cases, if we ask information for a revision
                 # about 1 parent, we'll later ask it for the other. So it make sense to
                 # keep the information around when reaching the first parent of a merge
                 # and dropping it after it was provided for the second parents.
                 #
                 # It exists cases were only one parent of the merge will be walked. It
                 # happens when the "destination" the copy tracing is descendant from a
                 # new root, not common with the "source". In that case, we will only walk
                 # through merge parents that are descendant of changesets common
                 # between "source" and "destination".
                 #
                 # With the current case implementation if such changesets have a copy
                 # information, we'll keep them in memory until the end of
                 # _changesetforwardcopies. We don't expect the case to be frequent
                 # enough to matters.
                 #
                 # In addition, it would be possible to reach pathological case, were
                 # many first parent are met before any second parent is reached. In
                 # that case the cache could grow. If this even become an issue one can
                 # safely introduce a maximum cache size. This would trade extra CPU/IO
                 # time to save memory.
                 merge_caches = {}
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     value = None
-                    if flags(rev) & REVIDX_SIDEDATA:
+                    e = merge_caches.pop(rev, None)
-                        e = merge_caches.pop(rev, None)
+                    if e is not None:
-                        if e is not None:
+                        return e
-                            return e
+                    c = changelogrevision(rev)
-                        c = changelogrevision(rev)
+                    p1copies = c.p1copies
-                        p1copies = c.p1copies
+                    p2copies = c.p2copies
-                        p2copies = c.p2copies
+                    removed = c.filesremoved
-                        removed = c.filesremoved
+                    if p1 != node.nullrev and p2 != node.nullrev:
-                        if p1 != node.nullrev and p2 != node.nullrev:
+                        # XXX some case we over cache, IGNORE
-                            # XXX some case we over cache, IGNORE
+                        value = merge_caches[rev] = (
-                            value = merge_caches[rev] = (
+                            p1,
-                                p1,
+                            p2,
-                                p2,
+                            p1copies,
-                                p1copies,
+                            p2copies,
-                                p2copies,
+                            removed,
-                                removed,
+                            get_ismerged(rev),
-                                get_ismerged(rev),
-                    else:
-                        p1copies = {}
-                        p2copies = {}
-                        removed = []
                     if value is None:
                         value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
                     return value
                 return revinfo
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
                 isancestor = cl.isancestorrev  # XXX we should had chaching to this.
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 if repo.filecopiesmode == b'changeset-sidedata':
                     revinfo = _revinfo_getter(repo)
                     return _combine_changeset_copies(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, ismerged):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return (
                         '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>'
                         % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)