upstream/mercurial-mirror Commit - r46504:06b64fab

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import os

11

import os

12

13

from .i18n import _

13

from .i18n import _

14

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

pycompat,

20

pycompat,

21

util,

21

util,

22

)

22

)

23

24

25

from .utils import stringutil

25

from .utils import stringutil

26

27

from .revlogutils import flagutil

27

from .revlogutils import flagutil

28

29

30

def _filter(src, dst, t):

30

def _filter(src, dst, t):

31

"""filters out invalid copies after chaining"""

31

"""filters out invalid copies after chaining"""

32

33

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

33

# When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')

34

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

34

# with copies in 'b' (from 'mid' to 'dst'), we can get the different cases

35

# in the following table (not including trivial cases). For example, case 2

35

# in the following table (not including trivial cases). For example, case 2

36

# is where a file existed in 'src' and remained under that name in 'mid' and

36

# is where a file existed in 'src' and remained under that name in 'mid' and

37

# then was renamed between 'mid' and 'dst'.

37

# then was renamed between 'mid' and 'dst'.

38

#

38

#

39

# case src mid dst result

39

# case src mid dst result

40

# 1 x y - -

40

# 1 x y - -

41

# 2 x y y x->y

41

# 2 x y y x->y

42

# 3 x y x -

42

# 3 x y x -

43

# 4 x y z x->z

43

# 4 x y z x->z

44

# 5 - x y -

44

# 5 - x y -

45

# 6 x x y x->y

45

# 6 x x y x->y

46

#

46

#

47

# _chain() takes care of chaining the copies in 'a' and 'b', but it

47

# _chain() takes care of chaining the copies in 'a' and 'b', but it

48

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

48

# cannot tell the difference between cases 1 and 2, between 3 and 4, or

49

# between 5 and 6, so it includes all cases in its result.

49

# between 5 and 6, so it includes all cases in its result.

50

# Cases 1, 3, and 5 are then removed by _filter().

50

# Cases 1, 3, and 5 are then removed by _filter().

51

52

for k, v in list(t.items()):

52

for k, v in list(t.items()):

53

# remove copies from files that didn't exist

53

# remove copies from files that didn't exist

54

if v not in src:

54

if v not in src:

55

del t[k]

55

del t[k]

56

# remove criss-crossed copies

56

# remove criss-crossed copies

57

elif k in src and v in dst:

57

elif k in src and v in dst:

58

del t[k]

58

del t[k]

59

# remove copies to files that were then removed

59

# remove copies to files that were then removed

60

elif k not in dst:

60

elif k not in dst:

61

del t[k]

61

del t[k]

62

63

64

def _chain(prefix, suffix):

64

def _chain(prefix, suffix):

65

"""chain two sets of copies 'prefix' and 'suffix'"""

65

"""chain two sets of copies 'prefix' and 'suffix'"""

66

result = prefix.copy()

66

result = prefix.copy()

67

for key, value in pycompat.iteritems(suffix):

67

for key, value in pycompat.iteritems(suffix):

68

result[key] = prefix.get(value, value)

68

result[key] = prefix.get(value, value)

69

return result

69

return result

70

71

72

def _tracefile(fctx, am, basemf):

72

def _tracefile(fctx, am, basemf):

73

"""return file context that is the ancestor of fctx present in ancestor

73

"""return file context that is the ancestor of fctx present in ancestor

74

manifest am

74

manifest am

75

76

Note: we used to try and stop after a given limit, however checking if that

76

Note: we used to try and stop after a given limit, however checking if that

77

limit is reached turned out to be very expensive. we are better off

77

limit is reached turned out to be very expensive. we are better off

78

disabling that feature."""

78

disabling that feature."""

79

80

for f in fctx.ancestors():

80

for f in fctx.ancestors():

81

path = f.path()

81

path = f.path()

82

if am.get(path, None) == f.filenode():

82

if am.get(path, None) == f.filenode():

83

return path

83

return path

84

if basemf and basemf.get(path, None) == f.filenode():

84

if basemf and basemf.get(path, None) == f.filenode():

85

return path

85

return path

86

87

88

def _dirstatecopies(repo, match=None):

88

def _dirstatecopies(repo, match=None):

89

ds = repo.dirstate

89

ds = repo.dirstate

90

c = ds.copies().copy()

90

c = ds.copies().copy()

91

for k in list(c):

91

for k in list(c):

92

if ds[k] not in b'anm' or (match and not match(k)):

92

if ds[k] not in b'anm' or (match and not match(k)):

93

del c[k]

93

del c[k]

94

return c

94

return c

95

96

97

def _computeforwardmissing(a, b, match=None):

97

def _computeforwardmissing(a, b, match=None):

98

"""Computes which files are in b but not a.

98

"""Computes which files are in b but not a.

99

This is its own function so extensions can easily wrap this call to see what

99

This is its own function so extensions can easily wrap this call to see what

100

files _forwardcopies is about to process.

100

files _forwardcopies is about to process.

101

"""

101

"""

102

ma = a.manifest()

102

ma = a.manifest()

103

mb = b.manifest()

103

mb = b.manifest()

104

return mb.filesnotin(ma, match=match)

104

return mb.filesnotin(ma, match=match)

105

106

107

def usechangesetcentricalgo(repo):

107

def usechangesetcentricalgo(repo):

108

"""Checks if we should use changeset-centric copy algorithms"""

108

"""Checks if we should use changeset-centric copy algorithms"""

109

if repo.filecopiesmode == b'changeset-sidedata':

109

if repo.filecopiesmode == b'changeset-sidedata':

110

return True

110

return True

111

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

111

readfrom = repo.ui.config(b'experimental', b'copies.read-from')

112

changesetsource = (b'changeset-only', b'compatibility')

112

changesetsource = (b'changeset-only', b'compatibility')

113

return readfrom in changesetsource

113

return readfrom in changesetsource

114

115

116

def _committedforwardcopies(a, b, base, match):

116

def _committedforwardcopies(a, b, base, match):

117

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

117

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

118

# files might have to be traced back to the fctx parent of the last

118

# files might have to be traced back to the fctx parent of the last

119

# one-side-only changeset, but not further back than that

119

# one-side-only changeset, but not further back than that

120

repo = a._repo

120

repo = a._repo

121

122

if usechangesetcentricalgo(repo):

122

if usechangesetcentricalgo(repo):

123

return _changesetforwardcopies(a, b, match)

123

return _changesetforwardcopies(a, b, match)

124

125

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

125

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

126

dbg = repo.ui.debug

126

dbg = repo.ui.debug

127

if debug:

127

if debug:

128

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

128

dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))

129

am = a.manifest()

129

am = a.manifest()

130

basemf = None if base is None else base.manifest()

130

basemf = None if base is None else base.manifest()

131

132

# find where new files came from

132

# find where new files came from

133

# we currently don't try to find where old files went, too expensive

133

# we currently don't try to find where old files went, too expensive

134

# this means we can miss a case like 'hg rm b; hg cp a b'

134

# this means we can miss a case like 'hg rm b; hg cp a b'

135

cm = {}

135

cm = {}

136

137

# Computing the forward missing is quite expensive on large manifests, since

137

# Computing the forward missing is quite expensive on large manifests, since

138

# it compares the entire manifests. We can optimize it in the common use

138

# it compares the entire manifests. We can optimize it in the common use

139

# case of computing what copies are in a commit versus its parent (like

139

# case of computing what copies are in a commit versus its parent (like

140

# during a rebase or histedit). Note, we exclude merge commits from this

140

# during a rebase or histedit). Note, we exclude merge commits from this

141

# optimization, since the ctx.files() for a merge commit is not correct for

141

# optimization, since the ctx.files() for a merge commit is not correct for

142

# this comparison.

142

# this comparison.

143

forwardmissingmatch = match

143

forwardmissingmatch = match

144

if b.p1() == a and b.p2().node() == node.nullid:

144

if b.p1() == a and b.p2().node() == node.nullid:

145

filesmatcher = matchmod.exact(b.files())

145

filesmatcher = matchmod.exact(b.files())

146

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

146

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

147

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

147

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

148

149

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

149

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

150

151

if debug:

151

if debug:

152

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

152

dbg(b'debug.copies: missing files to search: %d\n' % len(missing))

153

154

for f in sorted(missing):

154

for f in sorted(missing):

155

if debug:

155

if debug:

156

dbg(b'debug.copies: tracing file: %s\n' % f)

156

dbg(b'debug.copies: tracing file: %s\n' % f)

157

fctx = b[f]

157

fctx = b[f]

158

fctx._ancestrycontext = ancestrycontext

158

fctx._ancestrycontext = ancestrycontext

159

160

if debug:

160

if debug:

161

start = util.timer()

161

start = util.timer()

162

opath = _tracefile(fctx, am, basemf)

162

opath = _tracefile(fctx, am, basemf)

163

if opath:

163

if opath:

164

if debug:

164

if debug:

165

dbg(b'debug.copies: rename of: %s\n' % opath)

165

dbg(b'debug.copies: rename of: %s\n' % opath)

166

cm[f] = opath

166

cm[f] = opath

167

if debug:

167

if debug:

168

dbg(

168

dbg(

169

b'debug.copies: time: %f seconds\n'

169

b'debug.copies: time: %f seconds\n'

170

% (util.timer() - start)

170

% (util.timer() - start)

171

)

171

)

172

return cm

172

return cm

173

174

175

def _revinfo_getter(repo):

175

def _revinfo_getter(repo):

176

"""returns a function that returns the following data given a <rev>"

176

"""returns a function that returns the following data given a <rev>"

177

178

* p1: revision number of first parent

178

* p1: revision number of first parent

179

* p2: revision number of first parent

179

* p2: revision number of first parent

180

* changes: a ChangingFiles object

180

* changes: a ChangingFiles object

181

"""

181

"""

182

cl = repo.changelog

182

cl = repo.changelog

183

parents = cl.parentrevs

183

parents = cl.parentrevs

184

flags = cl.flags

184

flags = cl.flags

185

186

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

186

HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO

187

188

changelogrevision = cl.changelogrevision

188

changelogrevision = cl.changelogrevision

189

190

# A small cache to avoid doing the work twice for merges

190

# A small cache to avoid doing the work twice for merges

191

#

191

#

192

# In the vast majority of cases, if we ask information for a revision

192

# In the vast majority of cases, if we ask information for a revision

193

# about 1 parent, we'll later ask it for the other. So it make sense to

193

# about 1 parent, we'll later ask it for the other. So it make sense to

194

# keep the information around when reaching the first parent of a merge

194

# keep the information around when reaching the first parent of a merge

195

# and dropping it after it was provided for the second parents.

195

# and dropping it after it was provided for the second parents.

196

#

196

#

197

# It exists cases were only one parent of the merge will be walked. It

197

# It exists cases were only one parent of the merge will be walked. It

198

# happens when the "destination" the copy tracing is descendant from a

198

# happens when the "destination" the copy tracing is descendant from a

199

# new root, not common with the "source". In that case, we will only walk

199

# new root, not common with the "source". In that case, we will only walk

200

# through merge parents that are descendant of changesets common

200

# through merge parents that are descendant of changesets common

201

# between "source" and "destination".

201

# between "source" and "destination".

202

#

202

#

203

# With the current case implementation if such changesets have a copy

203

# With the current case implementation if such changesets have a copy

204

# information, we'll keep them in memory until the end of

204

# information, we'll keep them in memory until the end of

205

# _changesetforwardcopies. We don't expect the case to be frequent

205

# _changesetforwardcopies. We don't expect the case to be frequent

206

# enough to matters.

206

# enough to matters.

207

#

207

#

208

# In addition, it would be possible to reach pathological case, were

208

# In addition, it would be possible to reach pathological case, were

209

# many first parent are met before any second parent is reached. In

209

# many first parent are met before any second parent is reached. In

210

# that case the cache could grow. If this even become an issue one can

210

# that case the cache could grow. If this even become an issue one can

211

# safely introduce a maximum cache size. This would trade extra CPU/IO

211

# safely introduce a maximum cache size. This would trade extra CPU/IO

212

# time to save memory.

212

# time to save memory.

213

merge_caches = {}

213

merge_caches = {}

214

215

def revinfo(rev):

215

def revinfo(rev):

216

p1, p2 = parents(rev)

216

p1, p2 = parents(rev)

217

value = None

217

value = None

218

e = merge_caches.pop(rev, None)

218

e = merge_caches.pop(rev, None)

219

if e is not None:

219

if e is not None:

220

return e

220

return e

221

changes = None

221

changes = None

222

if flags(rev) & HASCOPIESINFO:

222

if flags(rev) & HASCOPIESINFO:

223

changes = changelogrevision(rev).changes

223

changes = changelogrevision(rev).changes

224

value = (p1, p2, changes)

224

value = (p1, p2, changes)

225

if p1 != node.nullrev and p2 != node.nullrev:

225

if p1 != node.nullrev and p2 != node.nullrev:

226

# XXX some case we over cache, IGNORE

226

# XXX some case we over cache, IGNORE

227

merge_caches[rev] = value

227

merge_caches[rev] = value

228

return value

228

return value

229

230

return revinfo

230

return revinfo

231

232

233

def cached_is_ancestor(is_ancestor):

234

"""return a cached version of is_ancestor"""

235

cache = {}

236

237

def _is_ancestor(anc, desc):

238

if anc > desc:

239

return False

240

elif anc == desc:

241

return True

242

key = (anc, desc)

243

ret = cache.get(key)

244

if ret is None:

245

ret = cache[key] = is_ancestor(anc, desc)

246

return ret

247

248

return _is_ancestor

249

250

233

def _changesetforwardcopies(a, b, match):

251

def _changesetforwardcopies(a, b, match):

234

if a.rev() in (node.nullrev, b.rev()):

252

if a.rev() in (node.nullrev, b.rev()):

235

return {}

253

return {}

236

254

237

repo = a.repo().unfiltered()

255

repo = a.repo().unfiltered()

238

children = {}

256

children = {}

239

257

240

cl = repo.changelog

258

cl = repo.changelog

241

isancestor = cl.isancestorrev # XXX we should had chaching to this.

259

isancestor = cached_is_ancestor(cl.isancestorrev)

242

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

260

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

243

mrset = set(missingrevs)

261

mrset = set(missingrevs)

244

roots = set()

262

roots = set()

245

for r in missingrevs:

263

for r in missingrevs:

246

for p in cl.parentrevs(r):

264

for p in cl.parentrevs(r):

247

if p == node.nullrev:

265

if p == node.nullrev:

248

continue

266

continue

249

if p not in children:

267

if p not in children:

250

children[p] = [r]

268

children[p] = [r]

251

else:

269

else:

252

children[p].append(r)

270

children[p].append(r)

253

if p not in mrset:

271

if p not in mrset:

254

roots.add(p)

272

roots.add(p)

255

if not roots:

273

if not roots:

256

# no common revision to track copies from

274

# no common revision to track copies from

257

return {}

275

return {}

258

min_root = min(roots)

276

min_root = min(roots)

259

277

260

from_head = set(

278

from_head = set(

261

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

279

cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)

262

)

280

)

263

281

264

iterrevs = set(from_head)

282

iterrevs = set(from_head)

265

iterrevs &= mrset

283

iterrevs &= mrset

266

iterrevs.update(roots)

284

iterrevs.update(roots)

267

iterrevs.remove(b.rev())

285

iterrevs.remove(b.rev())

268

revs = sorted(iterrevs)

286

revs = sorted(iterrevs)

269

287

270

if repo.filecopiesmode == b'changeset-sidedata':

288

if repo.filecopiesmode == b'changeset-sidedata':

271

revinfo = _revinfo_getter(repo)

289

revinfo = _revinfo_getter(repo)

272

return _combine_changeset_copies(

290

return _combine_changeset_copies(

273

revs, children, b.rev(), revinfo, match, isancestor

291

revs, children, b.rev(), revinfo, match, isancestor

274

)

292

)

275

else:

293

else:

276

revinfo = _revinfo_getter_extra(repo)

294

revinfo = _revinfo_getter_extra(repo)

277

return _combine_changeset_copies_extra(

295

return _combine_changeset_copies_extra(

278

revs, children, b.rev(), revinfo, match, isancestor

296

revs, children, b.rev(), revinfo, match, isancestor

279

)

297

)

280

298

281

299

282

def _combine_changeset_copies(

300

def _combine_changeset_copies(

283

revs, children, targetrev, revinfo, match, isancestor

301

revs, children, targetrev, revinfo, match, isancestor

284

):

302

):

285

"""combine the copies information for each item of iterrevs

303

"""combine the copies information for each item of iterrevs

286

304

287

revs: sorted iterable of revision to visit

305

revs: sorted iterable of revision to visit

288

children: a {parent: [children]} mapping.

306

children: a {parent: [children]} mapping.

289

targetrev: the final copies destination revision (not in iterrevs)

307

targetrev: the final copies destination revision (not in iterrevs)

290

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

308

revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)

291

match: a matcher

309

match: a matcher

292

310

293

It returns the aggregated copies information for `targetrev`.

311

It returns the aggregated copies information for `targetrev`.

294

"""

312

"""

295

all_copies = {}

313

all_copies = {}

296

alwaysmatch = match.always()

314

alwaysmatch = match.always()

297

for r in revs:

315

for r in revs:

298

copies = all_copies.pop(r, None)

316

copies = all_copies.pop(r, None)

299

if copies is None:

317

if copies is None:

300

# this is a root

318

# this is a root

301

copies = {}

319

copies = {}

302

for i, c in enumerate(children[r]):

320

for i, c in enumerate(children[r]):

303

p1, p2, changes = revinfo(c)

321

p1, p2, changes = revinfo(c)

304

childcopies = {}

322

childcopies = {}

305

if r == p1:

323

if r == p1:

306

parent = 1

324

parent = 1

307

if changes is not None:

325

if changes is not None:

308

childcopies = changes.copied_from_p1

326

childcopies = changes.copied_from_p1

309

else:

327

else:

310

assert r == p2

328

assert r == p2

311

parent = 2

329

parent = 2

312

if changes is not None:

330

if changes is not None:

313

childcopies = changes.copied_from_p2

331

childcopies = changes.copied_from_p2

314

if not alwaysmatch:

332

if not alwaysmatch:

315

childcopies = {

333

childcopies = {

316

dst: src for dst, src in childcopies.items() if match(dst)

334

dst: src for dst, src in childcopies.items() if match(dst)

317

}

335

}

318

newcopies = copies

336

newcopies = copies

319

if childcopies:

337

if childcopies:

320

newcopies = copies.copy()

338

newcopies = copies.copy()

321

for dest, source in pycompat.iteritems(childcopies):

339

for dest, source in pycompat.iteritems(childcopies):

322

prev = copies.get(source)

340

prev = copies.get(source)

323

if prev is not None and prev[1] is not None:

341

if prev is not None and prev[1] is not None:

324

source = prev[1]

342

source = prev[1]

325

newcopies[dest] = (c, source)

343

newcopies[dest] = (c, source)

326

assert newcopies is not copies

344

assert newcopies is not copies

327

if changes is not None:

345

if changes is not None:

328

for f in changes.removed:

346

for f in changes.removed:

329

if f in newcopies:

347

if f in newcopies:

330

if newcopies is copies:

348

if newcopies is copies:

331

# copy on write to avoid affecting potential other

349

# copy on write to avoid affecting potential other

332

# branches. when there are no other branches, this

350

# branches. when there are no other branches, this

333

# could be avoided.

351

# could be avoided.

334

newcopies = copies.copy()

352

newcopies = copies.copy()

335

newcopies[f] = (c, None)

353

newcopies[f] = (c, None)

336

othercopies = all_copies.get(c)

354

othercopies = all_copies.get(c)

337

if othercopies is None:

355

if othercopies is None:

338

all_copies[c] = newcopies

356

all_copies[c] = newcopies

339

else:

357

else:

340

# we are the second parent to work on c, we need to merge our

358

# we are the second parent to work on c, we need to merge our

341

# work with the other.

359

# work with the other.

342

#

360

#

343

# In case of conflict, parent 1 take precedence over parent 2.

361

# In case of conflict, parent 1 take precedence over parent 2.

344

# This is an arbitrary choice made anew when implementing

362

# This is an arbitrary choice made anew when implementing

345

# changeset based copies. It was made without regards with

363

# changeset based copies. It was made without regards with

346

# potential filelog related behavior.

364

# potential filelog related behavior.

347

if parent == 1:

365

if parent == 1:

348

_merge_copies_dict(

366

_merge_copies_dict(

349

othercopies, newcopies, isancestor, changes

367

othercopies, newcopies, isancestor, changes

350

)

368

)

351

else:

369

else:

352

_merge_copies_dict(

370

_merge_copies_dict(

353

newcopies, othercopies, isancestor, changes

371

newcopies, othercopies, isancestor, changes

354

)

372

)

355

all_copies[c] = newcopies

373

all_copies[c] = newcopies

356

374

357

final_copies = {}

375

final_copies = {}

358

for dest, (tt, source) in all_copies[targetrev].items():

376

for dest, (tt, source) in all_copies[targetrev].items():

359

if source is not None:

377

if source is not None:

360

final_copies[dest] = source

378

final_copies[dest] = source

361

return final_copies

379

return final_copies

362

380

363

381

364

def _merge_copies_dict(minor, major, isancestor, changes):

382

def _merge_copies_dict(minor, major, isancestor, changes):

365

"""merge two copies-mapping together, minor and major

383

"""merge two copies-mapping together, minor and major

366

384

367

In case of conflict, value from "major" will be picked.

385

In case of conflict, value from "major" will be picked.

368

386

369

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

387

- `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an

370

ancestors of `high_rev`,

388

ancestors of `high_rev`,

371

389

372

- `ismerged(path)`: callable return True if `path` have been merged in the

390

- `ismerged(path)`: callable return True if `path` have been merged in the

373

current revision,

391

current revision,

374

"""

392

"""

375

for dest, value in major.items():

393

for dest, value in major.items():

376

other = minor.get(dest)

394

other = minor.get(dest)

377

if other is None:

395

if other is None:

378

minor[dest] = value

396

minor[dest] = value

379

else:

397

else:

380

new_tt = value[0]

398

new_tt = value[0]

381

other_tt = other[0]

399

other_tt = other[0]

382

if value[1] == other[1]:

400

if value[1] == other[1]:

383

continue

401

continue

384

# content from "major" wins, unless it is older

402

# content from "major" wins, unless it is older

385

# than the branch point or there is a merge

403

# than the branch point or there is a merge

386

if new_tt == other_tt:

404

if new_tt == other_tt:

387

minor[dest] = value

405

minor[dest] = value

388

elif (

406

elif (

389

changes is not None

407

changes is not None

390

and value[1] is None

408

and value[1] is None

391

and dest in changes.salvaged

409

and dest in changes.salvaged

392

):

410

):

393

pass

411

pass

394

elif (

412

elif (

395

changes is not None

413

changes is not None

396

and other[1] is None

414

and other[1] is None

397

and dest in changes.salvaged

415

and dest in changes.salvaged

398

):

416

):

399

minor[dest] = value

417

minor[dest] = value

400

elif changes is not None and dest in changes.merged:

418

elif changes is not None and dest in changes.merged:

401

minor[dest] = value

419

minor[dest] = value

402

elif not isancestor(new_tt, other_tt):

420

elif not isancestor(new_tt, other_tt):

403

if value[1] is not None:

421

if value[1] is not None:

404

minor[dest] = value

422

minor[dest] = value

405

elif isancestor(other_tt, new_tt):

423

elif isancestor(other_tt, new_tt):

406

minor[dest] = value

424

minor[dest] = value

407

425

408

426

409

def _revinfo_getter_extra(repo):

427

def _revinfo_getter_extra(repo):

410

"""return a function that return multiple data given a <rev>"i

428

"""return a function that return multiple data given a <rev>"i

411

429

412

* p1: revision number of first parent

430

* p1: revision number of first parent

413

* p2: revision number of first parent

431

* p2: revision number of first parent

414

* p1copies: mapping of copies from p1

432

* p1copies: mapping of copies from p1

415

* p2copies: mapping of copies from p2

433

* p2copies: mapping of copies from p2

416

* removed: a list of removed files

434

* removed: a list of removed files

417

* ismerged: a callback to know if file was merged in that revision

435

* ismerged: a callback to know if file was merged in that revision

418

"""

436

"""

419

cl = repo.changelog

437

cl = repo.changelog

420

parents = cl.parentrevs

438

parents = cl.parentrevs

421

439

422

def get_ismerged(rev):

440

def get_ismerged(rev):

423

ctx = repo[rev]

441

ctx = repo[rev]

424

442

425

def ismerged(path):

443

def ismerged(path):

426

if path not in ctx.files():

444

if path not in ctx.files():

427

return False

445

return False

428

fctx = ctx[path]

446

fctx = ctx[path]

429

parents = fctx._filelog.parents(fctx._filenode)

447

parents = fctx._filelog.parents(fctx._filenode)

430

nb_parents = 0

448

nb_parents = 0

431

for n in parents:

449

for n in parents:

432

if n != node.nullid:

450

if n != node.nullid:

433

nb_parents += 1

451

nb_parents += 1

434

return nb_parents >= 2

452

return nb_parents >= 2

435

453

436

return ismerged

454

return ismerged

437

455

438

def revinfo(rev):

456

def revinfo(rev):

439

p1, p2 = parents(rev)

457

p1, p2 = parents(rev)

440

ctx = repo[rev]

458

ctx = repo[rev]

441

p1copies, p2copies = ctx._copies

459

p1copies, p2copies = ctx._copies

442

removed = ctx.filesremoved()

460

removed = ctx.filesremoved()

443

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

461

return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)

444

462

445

return revinfo

463

return revinfo

446

464

447

465

448

def _combine_changeset_copies_extra(

466

def _combine_changeset_copies_extra(

449

revs, children, targetrev, revinfo, match, isancestor

467

revs, children, targetrev, revinfo, match, isancestor

450

):

468

):

451

"""version of `_combine_changeset_copies` that works with the Google

469

"""version of `_combine_changeset_copies` that works with the Google

452

specific "extra" based storage for copy information"""

470

specific "extra" based storage for copy information"""

453

all_copies = {}

471

all_copies = {}

454

alwaysmatch = match.always()

472

alwaysmatch = match.always()

455

for r in revs:

473

for r in revs:

456

copies = all_copies.pop(r, None)

474

copies = all_copies.pop(r, None)

457

if copies is None:

475

if copies is None:

458

# this is a root

476

# this is a root

459

copies = {}

477

copies = {}

460

for i, c in enumerate(children[r]):

478

for i, c in enumerate(children[r]):

461

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

479

p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)

462

if r == p1:

480

if r == p1:

463

parent = 1

481

parent = 1

464

childcopies = p1copies

482

childcopies = p1copies

465

else:

483

else:

466

assert r == p2

484

assert r == p2

467

parent = 2

485

parent = 2

468

childcopies = p2copies

486

childcopies = p2copies

469

if not alwaysmatch:

487

if not alwaysmatch:

470

childcopies = {

488

childcopies = {

471

dst: src for dst, src in childcopies.items() if match(dst)

489

dst: src for dst, src in childcopies.items() if match(dst)

472

}

490

}

473

newcopies = copies

491

newcopies = copies

474

if childcopies:

492

if childcopies:

475

newcopies = copies.copy()

493

newcopies = copies.copy()

476

for dest, source in pycompat.iteritems(childcopies):

494

for dest, source in pycompat.iteritems(childcopies):

477

prev = copies.get(source)

495

prev = copies.get(source)

478

if prev is not None and prev[1] is not None:

496

if prev is not None and prev[1] is not None:

479

source = prev[1]

497

source = prev[1]

480

newcopies[dest] = (c, source)

498

newcopies[dest] = (c, source)

481

assert newcopies is not copies

499

assert newcopies is not copies

482

for f in removed:

500

for f in removed:

483

if f in newcopies:

501

if f in newcopies:

484

if newcopies is copies:

502

if newcopies is copies:

485

# copy on write to avoid affecting potential other

503

# copy on write to avoid affecting potential other

486

# branches. when there are no other branches, this

504

# branches. when there are no other branches, this

487

# could be avoided.

505

# could be avoided.

488

newcopies = copies.copy()

506

newcopies = copies.copy()

489

newcopies[f] = (c, None)

507

newcopies[f] = (c, None)

490

othercopies = all_copies.get(c)

508

othercopies = all_copies.get(c)

491

if othercopies is None:

509

if othercopies is None:

492

all_copies[c] = newcopies

510

all_copies[c] = newcopies

493

else:

511

else:

494

# we are the second parent to work on c, we need to merge our

512

# we are the second parent to work on c, we need to merge our

495

# work with the other.

513

# work with the other.

496

#

514

#

497

# In case of conflict, parent 1 take precedence over parent 2.

515

# In case of conflict, parent 1 take precedence over parent 2.

498

# This is an arbitrary choice made anew when implementing

516

# This is an arbitrary choice made anew when implementing

499

# changeset based copies. It was made without regards with

517

# changeset based copies. It was made without regards with

500

# potential filelog related behavior.

518

# potential filelog related behavior.

501

if parent == 1:

519

if parent == 1:

502

_merge_copies_dict_extra(

520

_merge_copies_dict_extra(

503

othercopies, newcopies, isancestor, ismerged

521

othercopies, newcopies, isancestor, ismerged

504

)

522

)

505

else:

523

else:

506

_merge_copies_dict_extra(

524

_merge_copies_dict_extra(

507

newcopies, othercopies, isancestor, ismerged

525

newcopies, othercopies, isancestor, ismerged

508

)

526

)

509

all_copies[c] = newcopies

527

all_copies[c] = newcopies

510

528

511

final_copies = {}

529

final_copies = {}

512

for dest, (tt, source) in all_copies[targetrev].items():

530

for dest, (tt, source) in all_copies[targetrev].items():

513

if source is not None:

531

if source is not None:

514

final_copies[dest] = source

532

final_copies[dest] = source

515

return final_copies

533

return final_copies

516

534

517

535

518

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

536

def _merge_copies_dict_extra(minor, major, isancestor, ismerged):

519

"""version of `_merge_copies_dict` that works with the Google

537

"""version of `_merge_copies_dict` that works with the Google

520

specific "extra" based storage for copy information"""

538

specific "extra" based storage for copy information"""

521

for dest, value in major.items():

539

for dest, value in major.items():

522

other = minor.get(dest)

540

other = minor.get(dest)

523

if other is None:

541

if other is None:

524

minor[dest] = value

542

minor[dest] = value

525

else:

543

else:

526

new_tt = value[0]

544

new_tt = value[0]

527

other_tt = other[0]

545

other_tt = other[0]

528

if value[1] == other[1]:

546

if value[1] == other[1]:

529

continue

547

continue

530

# content from "major" wins, unless it is older

548

# content from "major" wins, unless it is older

531

# than the branch point or there is a merge

549

# than the branch point or there is a merge

532

if (

550

if (

533

new_tt == other_tt

551

new_tt == other_tt

534

or not isancestor(new_tt, other_tt)

552

or not isancestor(new_tt, other_tt)

535

or ismerged(dest)

553

or ismerged(dest)

536

):

554

):

537

minor[dest] = value

555

minor[dest] = value

538

556

539

557

540

def _forwardcopies(a, b, base=None, match=None):

558

def _forwardcopies(a, b, base=None, match=None):

541

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

559

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

542

560

543

if base is None:

561

if base is None:

544

base = a

562

base = a

545

match = a.repo().narrowmatch(match)

563

match = a.repo().narrowmatch(match)

546

# check for working copy

564

# check for working copy

547

if b.rev() is None:

565

if b.rev() is None:

548

cm = _committedforwardcopies(a, b.p1(), base, match)

566

cm = _committedforwardcopies(a, b.p1(), base, match)

549

# combine copies from dirstate if necessary

567

# combine copies from dirstate if necessary

550

copies = _chain(cm, _dirstatecopies(b._repo, match))

568

copies = _chain(cm, _dirstatecopies(b._repo, match))

551

else:

569

else:

552

copies = _committedforwardcopies(a, b, base, match)

570

copies = _committedforwardcopies(a, b, base, match)

553

return copies

571

return copies

554

572

555

573

556

def _backwardrenames(a, b, match):

574

def _backwardrenames(a, b, match):

557

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

575

if a._repo.ui.config(b'experimental', b'copytrace') == b'off':

558

return {}

576

return {}

559

577

560

# Even though we're not taking copies into account, 1:n rename situations

578

# Even though we're not taking copies into account, 1:n rename situations

561

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

579

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

562

# arbitrarily pick one of the renames.

580

# arbitrarily pick one of the renames.

563

# We don't want to pass in "match" here, since that would filter

581

# We don't want to pass in "match" here, since that would filter

564

# the destination by it. Since we're reversing the copies, we want

582

# the destination by it. Since we're reversing the copies, we want

565

# to filter the source instead.

583

# to filter the source instead.

566

f = _forwardcopies(b, a)

584

f = _forwardcopies(b, a)

567

r = {}

585

r = {}

568

for k, v in sorted(pycompat.iteritems(f)):

586

for k, v in sorted(pycompat.iteritems(f)):

569

if match and not match(v):

587

if match and not match(v):

570

continue

588

continue

571

# remove copies

589

# remove copies

572

if v in a:

590

if v in a:

573

continue

591

continue

574

r[v] = k

592

r[v] = k

575

return r

593

return r

576

594

577

595

578

def pathcopies(x, y, match=None):

596

def pathcopies(x, y, match=None):

579

"""find {dst@y: src@x} copy mapping for directed compare"""

597

"""find {dst@y: src@x} copy mapping for directed compare"""

580

repo = x._repo

598

repo = x._repo

581

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

599

debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')

582

if debug:

600

if debug:

583

repo.ui.debug(

601

repo.ui.debug(

584

b'debug.copies: searching copies from %s to %s\n' % (x, y)

602

b'debug.copies: searching copies from %s to %s\n' % (x, y)

585

)

603

)

586

if x == y or not x or not y:

604

if x == y or not x or not y:

587

return {}

605

return {}

588

if y.rev() is None and x == y.p1():

606

if y.rev() is None and x == y.p1():

589

if debug:

607

if debug:

590

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

608

repo.ui.debug(b'debug.copies: search mode: dirstate\n')

591

# short-circuit to avoid issues with merge states

609

# short-circuit to avoid issues with merge states

592

return _dirstatecopies(repo, match)

610

return _dirstatecopies(repo, match)

593

a = y.ancestor(x)

611

a = y.ancestor(x)

594

if a == x:

612

if a == x:

595

if debug:

613

if debug:

596

repo.ui.debug(b'debug.copies: search mode: forward\n')

614

repo.ui.debug(b'debug.copies: search mode: forward\n')

597

copies = _forwardcopies(x, y, match=match)

615

copies = _forwardcopies(x, y, match=match)

598

elif a == y:

616

elif a == y:

599

if debug:

617

if debug:

600

repo.ui.debug(b'debug.copies: search mode: backward\n')

618

repo.ui.debug(b'debug.copies: search mode: backward\n')

601

copies = _backwardrenames(x, y, match=match)

619

copies = _backwardrenames(x, y, match=match)

602

else:

620

else:

603

if debug:

621

if debug:

604

repo.ui.debug(b'debug.copies: search mode: combined\n')

622

repo.ui.debug(b'debug.copies: search mode: combined\n')

605

base = None

623

base = None

606

if a.rev() != node.nullrev:

624

if a.rev() != node.nullrev:

607

base = x

625

base = x

608

copies = _chain(

626

copies = _chain(

609

_backwardrenames(x, a, match=match),

627

_backwardrenames(x, a, match=match),

610

_forwardcopies(a, y, base, match=match),

628

_forwardcopies(a, y, base, match=match),

611

)

629

)

612

_filter(x, y, copies)

630

_filter(x, y, copies)

613

return copies

631

return copies

614

632

615

633

616

def mergecopies(repo, c1, c2, base):

634

def mergecopies(repo, c1, c2, base):

617

"""

635

"""

618

Finds moves and copies between context c1 and c2 that are relevant for

636

Finds moves and copies between context c1 and c2 that are relevant for

619

merging. 'base' will be used as the merge base.

637

merging. 'base' will be used as the merge base.

620

638

621

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

639

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

622

files that were moved/ copied in one merge parent and modified in another.

640

files that were moved/ copied in one merge parent and modified in another.

623

For example:

641

For example:

624

642

625

o ---> 4 another commit

643

o ---> 4 another commit

626

|

644

|

627

| o ---> 3 commit that modifies a.txt

645

| o ---> 3 commit that modifies a.txt

628

| /

646

| /

629

o / ---> 2 commit that moves a.txt to b.txt

647

o / ---> 2 commit that moves a.txt to b.txt

630

|/

648

|/

631

o ---> 1 merge base

649

o ---> 1 merge base

632

650

633

If we try to rebase revision 3 on revision 4, since there is no a.txt in

651

If we try to rebase revision 3 on revision 4, since there is no a.txt in

634

revision 4, and if user have copytrace disabled, we prints the following

652

revision 4, and if user have copytrace disabled, we prints the following

635

message:

653

message:

636

654

637

```other changed <file> which local deleted```

655

```other changed <file> which local deleted```

638

656

639

Returns a tuple where:

657

Returns a tuple where:

640

658

641

"branch_copies" an instance of branch_copies.

659

"branch_copies" an instance of branch_copies.

642

660

643

"diverge" is a mapping of source name -> list of destination names

661

"diverge" is a mapping of source name -> list of destination names

644

for divergent renames.

662

for divergent renames.

645

663

646

This function calls different copytracing algorithms based on config.

664

This function calls different copytracing algorithms based on config.

647

"""

665

"""

648

# avoid silly behavior for update from empty dir

666

# avoid silly behavior for update from empty dir

649

if not c1 or not c2 or c1 == c2:

667

if not c1 or not c2 or c1 == c2:

650

return branch_copies(), branch_copies(), {}

668

return branch_copies(), branch_copies(), {}

651

669

652

narrowmatch = c1.repo().narrowmatch()

670

narrowmatch = c1.repo().narrowmatch()

653

671

654

# avoid silly behavior for parent -> working dir

672

# avoid silly behavior for parent -> working dir

655

if c2.node() is None and c1.node() == repo.dirstate.p1():

673

if c2.node() is None and c1.node() == repo.dirstate.p1():

656

return (

674

return (

657

branch_copies(_dirstatecopies(repo, narrowmatch)),

675

branch_copies(_dirstatecopies(repo, narrowmatch)),

658

branch_copies(),

676

branch_copies(),

659

{},

677

{},

660

)

678

)

661

679

662

copytracing = repo.ui.config(b'experimental', b'copytrace')

680

copytracing = repo.ui.config(b'experimental', b'copytrace')

663

if stringutil.parsebool(copytracing) is False:

681

if stringutil.parsebool(copytracing) is False:

664

# stringutil.parsebool() returns None when it is unable to parse the

682

# stringutil.parsebool() returns None when it is unable to parse the

665

# value, so we should rely on making sure copytracing is on such cases

683

# value, so we should rely on making sure copytracing is on such cases

666

return branch_copies(), branch_copies(), {}

684

return branch_copies(), branch_copies(), {}

667

685

668

if usechangesetcentricalgo(repo):

686

if usechangesetcentricalgo(repo):

669

# The heuristics don't make sense when we need changeset-centric algos

687

# The heuristics don't make sense when we need changeset-centric algos

670

return _fullcopytracing(repo, c1, c2, base)

688

return _fullcopytracing(repo, c1, c2, base)

671

689

672

# Copy trace disabling is explicitly below the node == p1 logic above

690

# Copy trace disabling is explicitly below the node == p1 logic above

673

# because the logic above is required for a simple copy to be kept across a

691

# because the logic above is required for a simple copy to be kept across a

674

# rebase.

692

# rebase.

675

if copytracing == b'heuristics':

693

if copytracing == b'heuristics':

676

# Do full copytracing if only non-public revisions are involved as

694

# Do full copytracing if only non-public revisions are involved as

677

# that will be fast enough and will also cover the copies which could

695

# that will be fast enough and will also cover the copies which could

678

# be missed by heuristics

696

# be missed by heuristics

679

if _isfullcopytraceable(repo, c1, base):

697

if _isfullcopytraceable(repo, c1, base):

680

return _fullcopytracing(repo, c1, c2, base)

698

return _fullcopytracing(repo, c1, c2, base)

681

return _heuristicscopytracing(repo, c1, c2, base)

699

return _heuristicscopytracing(repo, c1, c2, base)

682

else:

700

else:

683

return _fullcopytracing(repo, c1, c2, base)

701

return _fullcopytracing(repo, c1, c2, base)

684

702

685

703

686

def _isfullcopytraceable(repo, c1, base):

704

def _isfullcopytraceable(repo, c1, base):

687

""" Checks that if base, source and destination are all no-public branches,

705

""" Checks that if base, source and destination are all no-public branches,

688

if yes let's use the full copytrace algorithm for increased capabilities

706

if yes let's use the full copytrace algorithm for increased capabilities

689

since it will be fast enough.

707

since it will be fast enough.

690

708

691

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

709

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

692

number of changesets from c1 to base such that if number of changesets are

710

number of changesets from c1 to base such that if number of changesets are

693

more than the limit, full copytracing algorithm won't be used.

711

more than the limit, full copytracing algorithm won't be used.

694

"""

712

"""

695

if c1.rev() is None:

713

if c1.rev() is None:

696

c1 = c1.p1()

714

c1 = c1.p1()

697

if c1.mutable() and base.mutable():

715

if c1.mutable() and base.mutable():

698

sourcecommitlimit = repo.ui.configint(

716

sourcecommitlimit = repo.ui.configint(

699

b'experimental', b'copytrace.sourcecommitlimit'

717

b'experimental', b'copytrace.sourcecommitlimit'

700

)

718

)

701

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

719

commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))

702

return commits < sourcecommitlimit

720

return commits < sourcecommitlimit

703

return False

721

return False

704

722

705

723

706

def _checksinglesidecopies(

724

def _checksinglesidecopies(

707

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

725

src, dsts1, m1, m2, mb, c2, base, copy, renamedelete

708

):

726

):

709

if src not in m2:

727

if src not in m2:

710

# deleted on side 2

728

# deleted on side 2

711

if src not in m1:

729

if src not in m1:

712

# renamed on side 1, deleted on side 2

730

# renamed on side 1, deleted on side 2

713

renamedelete[src] = dsts1

731

renamedelete[src] = dsts1

714

elif src not in mb:

732

elif src not in mb:

715

# Work around the "short-circuit to avoid issues with merge states"

733

# Work around the "short-circuit to avoid issues with merge states"

716

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

734

# thing in pathcopies(): pathcopies(x, y) can return a copy where the

717

# destination doesn't exist in y.

735

# destination doesn't exist in y.

718

pass

736

pass

719

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

737

elif mb[src] != m2[src] and not _related(c2[src], base[src]):

720

return

738

return

721

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

739

elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):

722

# modified on side 2

740

# modified on side 2

723

for dst in dsts1:

741

for dst in dsts1:

724

copy[dst] = src

742

copy[dst] = src

725

743

726

744

727

class branch_copies(object):

745

class branch_copies(object):

728

"""Information about copies made on one side of a merge/graft.

746

"""Information about copies made on one side of a merge/graft.

729

747

730

"copy" is a mapping from destination name -> source name,

748

"copy" is a mapping from destination name -> source name,

731

where source is in c1 and destination is in c2 or vice-versa.

749

where source is in c1 and destination is in c2 or vice-versa.

732

750

733

"movewithdir" is a mapping from source name -> destination name,

751

"movewithdir" is a mapping from source name -> destination name,

734

where the file at source present in one context but not the other

752

where the file at source present in one context but not the other

735

needs to be moved to destination by the merge process, because the

753

needs to be moved to destination by the merge process, because the

736

other context moved the directory it is in.

754

other context moved the directory it is in.

737

755

738

"renamedelete" is a mapping of source name -> list of destination

756

"renamedelete" is a mapping of source name -> list of destination

739

names for files deleted in c1 that were renamed in c2 or vice-versa.

757

names for files deleted in c1 that were renamed in c2 or vice-versa.

740

758

741

"dirmove" is a mapping of detected source dir -> destination dir renames.

759

"dirmove" is a mapping of detected source dir -> destination dir renames.

742

This is needed for handling changes to new files previously grafted into

760

This is needed for handling changes to new files previously grafted into

743

renamed directories.

761

renamed directories.

744

"""

762

"""

745

763

746

def __init__(

764

def __init__(

747

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

765

self, copy=None, renamedelete=None, dirmove=None, movewithdir=None

748

):

766

):

749

self.copy = {} if copy is None else copy

767

self.copy = {} if copy is None else copy

750

self.renamedelete = {} if renamedelete is None else renamedelete

768

self.renamedelete = {} if renamedelete is None else renamedelete

751

self.dirmove = {} if dirmove is None else dirmove

769

self.dirmove = {} if dirmove is None else dirmove

752

self.movewithdir = {} if movewithdir is None else movewithdir

770

self.movewithdir = {} if movewithdir is None else movewithdir

753

771

754

def __repr__(self):

772

def __repr__(self):

755

return (

773

return (

756

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

774

'<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'

757

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

775

% (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)

758

)

776

)

759

777

760

778

761

def _fullcopytracing(repo, c1, c2, base):

779

def _fullcopytracing(repo, c1, c2, base):

762

""" The full copytracing algorithm which finds all the new files that were

780

""" The full copytracing algorithm which finds all the new files that were

763

added from merge base up to the top commit and for each file it checks if

781

added from merge base up to the top commit and for each file it checks if

764

this file was copied from another file.

782

this file was copied from another file.

765

783

766

This is pretty slow when a lot of changesets are involved but will track all

784

This is pretty slow when a lot of changesets are involved but will track all

767

the copies.

785

the copies.

768

"""

786

"""

769

m1 = c1.manifest()

787

m1 = c1.manifest()

770

m2 = c2.manifest()

788

m2 = c2.manifest()

771

mb = base.manifest()

789

mb = base.manifest()

772

790

773

copies1 = pathcopies(base, c1)

791

copies1 = pathcopies(base, c1)

774

copies2 = pathcopies(base, c2)

792

copies2 = pathcopies(base, c2)

775

793

776

if not (copies1 or copies2):

794

if not (copies1 or copies2):

777

return branch_copies(), branch_copies(), {}

795

return branch_copies(), branch_copies(), {}

778

796

779

inversecopies1 = {}

797

inversecopies1 = {}

780

inversecopies2 = {}

798

inversecopies2 = {}

781

for dst, src in copies1.items():

799

for dst, src in copies1.items():

782

inversecopies1.setdefault(src, []).append(dst)

800

inversecopies1.setdefault(src, []).append(dst)

783

for dst, src in copies2.items():

801

for dst, src in copies2.items():

784

inversecopies2.setdefault(src, []).append(dst)

802

inversecopies2.setdefault(src, []).append(dst)

785

803

786

copy1 = {}

804

copy1 = {}

787

copy2 = {}

805

copy2 = {}

788

diverge = {}

806

diverge = {}

789

renamedelete1 = {}

807

renamedelete1 = {}

790

renamedelete2 = {}

808

renamedelete2 = {}

791

allsources = set(inversecopies1) | set(inversecopies2)

809

allsources = set(inversecopies1) | set(inversecopies2)

792

for src in allsources:

810

for src in allsources:

793

dsts1 = inversecopies1.get(src)

811

dsts1 = inversecopies1.get(src)

794

dsts2 = inversecopies2.get(src)

812

dsts2 = inversecopies2.get(src)

795

if dsts1 and dsts2:

813

if dsts1 and dsts2:

796

# copied/renamed on both sides

814

# copied/renamed on both sides

797

if src not in m1 and src not in m2:

815

if src not in m1 and src not in m2:

798

# renamed on both sides

816

# renamed on both sides

799

dsts1 = set(dsts1)

817

dsts1 = set(dsts1)

800

dsts2 = set(dsts2)

818

dsts2 = set(dsts2)

801

# If there's some overlap in the rename destinations, we

819

# If there's some overlap in the rename destinations, we

802

# consider it not divergent. For example, if side 1 copies 'a'

820

# consider it not divergent. For example, if side 1 copies 'a'

803

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

821

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

804

# and 'd' and deletes 'a'.

822

# and 'd' and deletes 'a'.

805

if dsts1 & dsts2:

823

if dsts1 & dsts2:

806

for dst in dsts1 & dsts2:

824

for dst in dsts1 & dsts2:

807

copy1[dst] = src

825

copy1[dst] = src

808

copy2[dst] = src

826

copy2[dst] = src

809

else:

827

else:

810

diverge[src] = sorted(dsts1 | dsts2)

828

diverge[src] = sorted(dsts1 | dsts2)

811

elif src in m1 and src in m2:

829

elif src in m1 and src in m2:

812

# copied on both sides

830

# copied on both sides

813

dsts1 = set(dsts1)

831

dsts1 = set(dsts1)

814

dsts2 = set(dsts2)

832

dsts2 = set(dsts2)

815

for dst in dsts1 & dsts2:

833

for dst in dsts1 & dsts2:

816

copy1[dst] = src

834

copy1[dst] = src

817

copy2[dst] = src

835

copy2[dst] = src

818

# TODO: Handle cases where it was renamed on one side and copied

836

# TODO: Handle cases where it was renamed on one side and copied

819

# on the other side

837

# on the other side

820

elif dsts1:

838

elif dsts1:

821

# copied/renamed only on side 1

839

# copied/renamed only on side 1

822

_checksinglesidecopies(

840

_checksinglesidecopies(

823

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

841

src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1

824

)

842

)

825

elif dsts2:

843

elif dsts2:

826

# copied/renamed only on side 2

844

# copied/renamed only on side 2

827

_checksinglesidecopies(

845

_checksinglesidecopies(

828

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

846

src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2

829

)

847

)

830

848

831

# find interesting file sets from manifests

849

# find interesting file sets from manifests

832

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

850

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

833

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

851

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

834

u1 = sorted(addedinm1 - addedinm2)

852

u1 = sorted(addedinm1 - addedinm2)

835

u2 = sorted(addedinm2 - addedinm1)

853

u2 = sorted(addedinm2 - addedinm1)

836

854

837

header = b" unmatched files in %s"

855

header = b" unmatched files in %s"

838

if u1:

856

if u1:

839

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

857

repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))

840

if u2:

858

if u2:

841

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

859

repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))

842

860

843

if repo.ui.debugflag:

861

if repo.ui.debugflag:

844

renamedeleteset = set()

862

renamedeleteset = set()

845

divergeset = set()

863

divergeset = set()

846

for dsts in diverge.values():

864

for dsts in diverge.values():

847

divergeset.update(dsts)

865

divergeset.update(dsts)

848

for dsts in renamedelete1.values():

866

for dsts in renamedelete1.values():

849

renamedeleteset.update(dsts)

867

renamedeleteset.update(dsts)

850

for dsts in renamedelete2.values():

868

for dsts in renamedelete2.values():

851

renamedeleteset.update(dsts)

869

renamedeleteset.update(dsts)

852

870

853

repo.ui.debug(

871

repo.ui.debug(

854

b" all copies found (* = to merge, ! = divergent, "

872

b" all copies found (* = to merge, ! = divergent, "

855

b"% = renamed and deleted):\n"

873

b"% = renamed and deleted):\n"

856

)

874

)

857

for side, copies in ((b"local", copies1), (b"remote", copies2)):

875

for side, copies in ((b"local", copies1), (b"remote", copies2)):

858

if not copies:

876

if not copies:

859

continue

877

continue

860

repo.ui.debug(b" on %s side:\n" % side)

878

repo.ui.debug(b" on %s side:\n" % side)

861

for f in sorted(copies):

879

for f in sorted(copies):

862

note = b""

880

note = b""

863

if f in copy1 or f in copy2:

881

if f in copy1 or f in copy2:

864

note += b"*"

882

note += b"*"

865

if f in divergeset:

883

if f in divergeset:

866

note += b"!"

884

note += b"!"

867

if f in renamedeleteset:

885

if f in renamedeleteset:

868

note += b"%"

886

note += b"%"

869

repo.ui.debug(

887

repo.ui.debug(

870

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

888

b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)

871

)

889

)

872

del renamedeleteset

890

del renamedeleteset

873

del divergeset

891

del divergeset

874

892

875

repo.ui.debug(b" checking for directory renames\n")

893

repo.ui.debug(b" checking for directory renames\n")

876

894

877

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

895

dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)

878

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

896

dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)

879

897

880

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

898

branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)

881

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

899

branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)

882

900

883

return branch_copies1, branch_copies2, diverge

901

return branch_copies1, branch_copies2, diverge

884

902

885

903

886

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

904

def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):

887

"""Finds moved directories and files that should move with them.

905

"""Finds moved directories and files that should move with them.

888

906

889

ctx: the context for one of the sides

907

ctx: the context for one of the sides

890

copy: files copied on the same side (as ctx)

908

copy: files copied on the same side (as ctx)

891

fullcopy: files copied on the same side (as ctx), including those that

909

fullcopy: files copied on the same side (as ctx), including those that

892

merge.manifestmerge() won't care about

910

merge.manifestmerge() won't care about

893

addedfiles: added files on the other side (compared to ctx)

911

addedfiles: added files on the other side (compared to ctx)

894

"""

912

"""

895

# generate a directory move map

913

# generate a directory move map

896

d = ctx.dirs()

914

d = ctx.dirs()

897

invalid = set()

915

invalid = set()

898

dirmove = {}

916

dirmove = {}

899

917

900

# examine each file copy for a potential directory move, which is

918

# examine each file copy for a potential directory move, which is

901

# when all the files in a directory are moved to a new directory

919

# when all the files in a directory are moved to a new directory

902

for dst, src in pycompat.iteritems(fullcopy):

920

for dst, src in pycompat.iteritems(fullcopy):

903

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

921

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

904

if dsrc in invalid:

922

if dsrc in invalid:

905

# already seen to be uninteresting

923

# already seen to be uninteresting

906

continue

924

continue

907

elif dsrc in d and ddst in d:

925

elif dsrc in d and ddst in d:

908

# directory wasn't entirely moved locally

926

# directory wasn't entirely moved locally

909

invalid.add(dsrc)

927

invalid.add(dsrc)

910

elif dsrc in dirmove and dirmove[dsrc] != ddst:

928

elif dsrc in dirmove and dirmove[dsrc] != ddst:

911

# files from the same directory moved to two different places

929

# files from the same directory moved to two different places

912

invalid.add(dsrc)

930

invalid.add(dsrc)

913

else:

931

else:

914

# looks good so far

932

# looks good so far

915

dirmove[dsrc] = ddst

933

dirmove[dsrc] = ddst

916

934

917

for i in invalid:

935

for i in invalid:

918

if i in dirmove:

936

if i in dirmove:

919

del dirmove[i]

937

del dirmove[i]

920

del d, invalid

938

del d, invalid

921

939

922

if not dirmove:

940

if not dirmove:

923

return {}, {}

941

return {}, {}

924

942

925

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

943

dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}

926

944

927

for d in dirmove:

945

for d in dirmove:

928

repo.ui.debug(

946

repo.ui.debug(

929

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

947

b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])

930

)

948

)

931

949

932

movewithdir = {}

950

movewithdir = {}

933

# check unaccounted nonoverlapping files against directory moves

951

# check unaccounted nonoverlapping files against directory moves

934

for f in addedfiles:

952

for f in addedfiles:

935

if f not in fullcopy:

953

if f not in fullcopy:

936

for d in dirmove:

954

for d in dirmove:

937

if f.startswith(d):

955

if f.startswith(d):

938

# new file added in a directory that was moved, move it

956

# new file added in a directory that was moved, move it

939

df = dirmove[d] + f[len(d) :]

957

df = dirmove[d] + f[len(d) :]

940

if df not in copy:

958

if df not in copy:

941

movewithdir[f] = df

959

movewithdir[f] = df

942

repo.ui.debug(

960

repo.ui.debug(

943

b" pending file src: '%s' -> dst: '%s'\n"

961

b" pending file src: '%s' -> dst: '%s'\n"

944

% (f, df)

962

% (f, df)

945

)

963

)

946

break

964

break

947

965

948

return dirmove, movewithdir

966

return dirmove, movewithdir

949

967

950

968

951

def _heuristicscopytracing(repo, c1, c2, base):

969

def _heuristicscopytracing(repo, c1, c2, base):

952

""" Fast copytracing using filename heuristics

970

""" Fast copytracing using filename heuristics

953

971

954

Assumes that moves or renames are of following two types:

972

Assumes that moves or renames are of following two types:

955

973

956

1) Inside a directory only (same directory name but different filenames)

974

1) Inside a directory only (same directory name but different filenames)

957

2) Move from one directory to another

975

2) Move from one directory to another

958

(same filenames but different directory names)

976

(same filenames but different directory names)

959

977

960

Works only when there are no merge commits in the "source branch".

978

Works only when there are no merge commits in the "source branch".

961

Source branch is commits from base up to c2 not including base.

979

Source branch is commits from base up to c2 not including base.

962

980

963

If merge is involved it fallbacks to _fullcopytracing().

981

If merge is involved it fallbacks to _fullcopytracing().

964

982

965

Can be used by setting the following config:

983

Can be used by setting the following config:

966

984

967

[experimental]

985

[experimental]

968

copytrace = heuristics

986

copytrace = heuristics

969

987

970

In some cases the copy/move candidates found by heuristics can be very large

988

In some cases the copy/move candidates found by heuristics can be very large

971

in number and that will make the algorithm slow. The number of possible

989

in number and that will make the algorithm slow. The number of possible

972

candidates to check can be limited by using the config

990

candidates to check can be limited by using the config

973

`experimental.copytrace.movecandidateslimit` which defaults to 100.

991

`experimental.copytrace.movecandidateslimit` which defaults to 100.

974

"""

992

"""

975

993

976

if c1.rev() is None:

994

if c1.rev() is None:

977

c1 = c1.p1()

995

c1 = c1.p1()

978

if c2.rev() is None:

996

if c2.rev() is None:

979

c2 = c2.p1()

997

c2 = c2.p1()

980

998

981

changedfiles = set()

999

changedfiles = set()

982

m1 = c1.manifest()

1000

m1 = c1.manifest()

983

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

1001

if not repo.revs(b'%d::%d', base.rev(), c2.rev()):

984

# If base is not in c2 branch, we switch to fullcopytracing

1002

# If base is not in c2 branch, we switch to fullcopytracing

985

repo.ui.debug(

1003

repo.ui.debug(

986

b"switching to full copytracing as base is not "

1004

b"switching to full copytracing as base is not "

987

b"an ancestor of c2\n"

1005

b"an ancestor of c2\n"

988

)

1006

)

989

return _fullcopytracing(repo, c1, c2, base)

1007

return _fullcopytracing(repo, c1, c2, base)

990

1008

991

ctx = c2

1009

ctx = c2

992

while ctx != base:

1010

while ctx != base:

993

if len(ctx.parents()) == 2:

1011

if len(ctx.parents()) == 2:

994

# To keep things simple let's not handle merges

1012

# To keep things simple let's not handle merges

995

repo.ui.debug(b"switching to full copytracing because of merges\n")

1013

repo.ui.debug(b"switching to full copytracing because of merges\n")

996

return _fullcopytracing(repo, c1, c2, base)

1014

return _fullcopytracing(repo, c1, c2, base)

997

changedfiles.update(ctx.files())

1015

changedfiles.update(ctx.files())

998

ctx = ctx.p1()

1016

ctx = ctx.p1()

999

1017

1000

copies2 = {}

1018

copies2 = {}

1001

cp = _forwardcopies(base, c2)

1019

cp = _forwardcopies(base, c2)

1002

for dst, src in pycompat.iteritems(cp):

1020

for dst, src in pycompat.iteritems(cp):

1003

if src in m1:

1021

if src in m1:

1004

copies2[dst] = src

1022

copies2[dst] = src

1005

1023

1006

# file is missing if it isn't present in the destination, but is present in

1024

# file is missing if it isn't present in the destination, but is present in

1007

# the base and present in the source.

1025

# the base and present in the source.

1008

# Presence in the base is important to exclude added files, presence in the

1026

# Presence in the base is important to exclude added files, presence in the

1009

# source is important to exclude removed files.

1027

# source is important to exclude removed files.

1010

filt = lambda f: f not in m1 and f in base and f in c2

1028

filt = lambda f: f not in m1 and f in base and f in c2

1011

missingfiles = [f for f in changedfiles if filt(f)]

1029

missingfiles = [f for f in changedfiles if filt(f)]

1012

1030

1013

copies1 = {}

1031

copies1 = {}

1014

if missingfiles:

1032

if missingfiles:

1015

basenametofilename = collections.defaultdict(list)

1033

basenametofilename = collections.defaultdict(list)

1016

dirnametofilename = collections.defaultdict(list)

1034

dirnametofilename = collections.defaultdict(list)

1017

1035

1018

for f in m1.filesnotin(base.manifest()):

1036

for f in m1.filesnotin(base.manifest()):

1019

basename = os.path.basename(f)

1037

basename = os.path.basename(f)

1020

dirname = os.path.dirname(f)

1038

dirname = os.path.dirname(f)

1021

basenametofilename[basename].append(f)

1039

basenametofilename[basename].append(f)

1022

dirnametofilename[dirname].append(f)

1040

dirnametofilename[dirname].append(f)

1023

1041

1024

for f in missingfiles:

1042

for f in missingfiles:

1025

basename = os.path.basename(f)

1043

basename = os.path.basename(f)

1026

dirname = os.path.dirname(f)

1044

dirname = os.path.dirname(f)

1027

samebasename = basenametofilename[basename]

1045

samebasename = basenametofilename[basename]

1028

samedirname = dirnametofilename[dirname]

1046

samedirname = dirnametofilename[dirname]

1029

movecandidates = samebasename + samedirname

1047

movecandidates = samebasename + samedirname

1030

# f is guaranteed to be present in c2, that's why

1048

# f is guaranteed to be present in c2, that's why

1031

# c2.filectx(f) won't fail

1049

# c2.filectx(f) won't fail

1032

f2 = c2.filectx(f)

1050

f2 = c2.filectx(f)

1033

# we can have a lot of candidates which can slow down the heuristics

1051

# we can have a lot of candidates which can slow down the heuristics

1034

# config value to limit the number of candidates moves to check

1052

# config value to limit the number of candidates moves to check

1035

maxcandidates = repo.ui.configint(

1053

maxcandidates = repo.ui.configint(

1036

b'experimental', b'copytrace.movecandidateslimit'

1054

b'experimental', b'copytrace.movecandidateslimit'

1037

)

1055

)

1038

1056

1039

if len(movecandidates) > maxcandidates:

1057

if len(movecandidates) > maxcandidates:

1040

repo.ui.status(

1058

repo.ui.status(

1041

_(

1059

_(

1042

b"skipping copytracing for '%s', more "

1060

b"skipping copytracing for '%s', more "

1043

b"candidates than the limit: %d\n"

1061

b"candidates than the limit: %d\n"

1044

)

1062

)

1045

% (f, len(movecandidates))

1063

% (f, len(movecandidates))

1046

)

1064

)

1047

continue

1065

continue

1048

1066

1049

for candidate in movecandidates:

1067

for candidate in movecandidates:

1050

f1 = c1.filectx(candidate)

1068

f1 = c1.filectx(candidate)

1051

if _related(f1, f2):

1069

if _related(f1, f2):

1052

# if there are a few related copies then we'll merge

1070

# if there are a few related copies then we'll merge

1053

# changes into all of them. This matches the behaviour

1071

# changes into all of them. This matches the behaviour

1054

# of upstream copytracing

1072

# of upstream copytracing

1055

copies1[candidate] = f

1073

copies1[candidate] = f

1056

1074

1057

return branch_copies(copies1), branch_copies(copies2), {}

1075

return branch_copies(copies1), branch_copies(copies2), {}

1058

1076

1059

1077

1060

def _related(f1, f2):

1078

def _related(f1, f2):

1061

"""return True if f1 and f2 filectx have a common ancestor

1079

"""return True if f1 and f2 filectx have a common ancestor

1062

1080

1063

Walk back to common ancestor to see if the two files originate

1081

Walk back to common ancestor to see if the two files originate

1064

from the same file. Since workingfilectx's rev() is None it messes

1082

from the same file. Since workingfilectx's rev() is None it messes

1065

up the integer comparison logic, hence the pre-step check for

1083

up the integer comparison logic, hence the pre-step check for

1066

None (f1 and f2 can only be workingfilectx's initially).

1084

None (f1 and f2 can only be workingfilectx's initially).

1067

"""

1085

"""

1068

1086

1069

if f1 == f2:

1087

if f1 == f2:

1070

return True # a match

1088

return True # a match

1071

1089

1072

g1, g2 = f1.ancestors(), f2.ancestors()

1090

g1, g2 = f1.ancestors(), f2.ancestors()

1073

try:

1091

try:

1074

f1r, f2r = f1.linkrev(), f2.linkrev()

1092

f1r, f2r = f1.linkrev(), f2.linkrev()

1075

1093

1076

if f1r is None:

1094

if f1r is None:

1077

f1 = next(g1)

1095

f1 = next(g1)

1078

if f2r is None:

1096

if f2r is None:

1079

f2 = next(g2)

1097

f2 = next(g2)

1080

1098

1081

while True:

1099

while True:

1082

f1r, f2r = f1.linkrev(), f2.linkrev()

1100

f1r, f2r = f1.linkrev(), f2.linkrev()

1083

if f1r > f2r:

1101

if f1r > f2r:

1084

f1 = next(g1)

1102

f1 = next(g1)

1085

elif f2r > f1r:

1103

elif f2r > f1r:

1086

f2 = next(g2)

1104

f2 = next(g2)

1087

else: # f1 and f2 point to files in the same linkrev

1105

else: # f1 and f2 point to files in the same linkrev

1088

return f1 == f2 # true if they point to the same file

1106

return f1 == f2 # true if they point to the same file

1089

except StopIteration:

1107

except StopIteration:

1090

return False

1108

return False

1091

1109

1092

1110

1093

def graftcopies(wctx, ctx, base):

1111

def graftcopies(wctx, ctx, base):

1094

"""reproduce copies between base and ctx in the wctx

1112

"""reproduce copies between base and ctx in the wctx

1095

1113

1096

Unlike mergecopies(), this function will only consider copies between base

1114

Unlike mergecopies(), this function will only consider copies between base

1097

and ctx; it will ignore copies between base and wctx. Also unlike

1115

and ctx; it will ignore copies between base and wctx. Also unlike

1098

mergecopies(), this function will apply copies to the working copy (instead

1116

mergecopies(), this function will apply copies to the working copy (instead

1099

of just returning information about the copies). That makes it cheaper

1117

of just returning information about the copies). That makes it cheaper

1100

(especially in the common case of base==ctx.p1()) and useful also when

1118

(especially in the common case of base==ctx.p1()) and useful also when

1101

experimental.copytrace=off.

1119

experimental.copytrace=off.

1102

1120

1103

merge.update() will have already marked most copies, but it will only

1121

merge.update() will have already marked most copies, but it will only

1104

mark copies if it thinks the source files are related (see

1122

mark copies if it thinks the source files are related (see

1105

merge._related()). It will also not mark copies if the file wasn't modified

1123

merge._related()). It will also not mark copies if the file wasn't modified

1106

on the local side. This function adds the copies that were "missed"

1124

on the local side. This function adds the copies that were "missed"

1107

by merge.update().

1125

by merge.update().

1108

"""

1126

"""

1109

new_copies = pathcopies(base, ctx)

1127

new_copies = pathcopies(base, ctx)

1110

_filter(wctx.p1(), wctx, new_copies)

1128

_filter(wctx.p1(), wctx, new_copies)

1111

for dst, src in pycompat.iteritems(new_copies):

1129

for dst, src in pycompat.iteritems(new_copies):

1112

wctx[dst].markcopied(src)

1130

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             from .revlogutils import flagutil
             def _filter(src, dst, t):
                 """filters out invalid copies after chaining"""
                 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
                 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
                 # in the following table (not including trivial cases). For example, case 2
                 # is where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 #
                 # _chain() takes care of chaining the copies in 'a' and 'b', but it
                 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
                 # between 5 and 6, so it includes all cases in its result.
                 # Cases 1, 3, and 5 are then removed by _filter().
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed
                     elif k not in dst:
                         del t[k]
             def _chain(prefix, suffix):
                 """chain two sets of copies 'prefix' and 'suffix'"""
                 result = prefix.copy()
                 for key, value in pycompat.iteritems(suffix):
                     result[key] = prefix.get(value, value)
                 return result
             def _tracefile(fctx, am, basemf):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am
                 Note: we used to try and stop after a given limit, however checking if that
                 limit is reached turned out to be very expensive. we are better off
                 disabling that feature."""
                 for f in fctx.ancestors():
                     path = f.path()
                     if am.get(path, None) == f.filenode():
                         return path
                     if basemf and basemf.get(path, None) == f.filenode():
                         return path
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in b'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 if repo.filecopiesmode == b'changeset-sidedata':
                     return True
                 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
                 changesetsource = (b'changeset-only', b'compatibility')
                 return readfrom in changesetsource
             def _committedforwardcopies(a, b, base, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg(b'debug.copies:    looking into rename from %s to %s\n' % (a, b))
                 am = a.manifest()
                 basemf = None if base is None else base.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg(b'debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg(b'debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     opath = _tracefile(fctx, am, basemf)
                     if opath:
                         if debug:
                             dbg(b'debug.copies:          rename of: %s\n' % opath)
                         cm[f] = opath
                     if debug:
                         dbg(
                             b'debug.copies:          time: %f seconds\n'
                             % (util.timer() - start)
                         )
                 return cm
             def _revinfo_getter(repo):
                 """returns a function that returns the following data given a <rev>"
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * changes: a ChangingFiles object
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 flags = cl.flags
                 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
                 changelogrevision = cl.changelogrevision
                 # A small cache to avoid doing the work twice for merges
                 #
                 # In the vast majority of cases, if we ask information for a revision
                 # about 1 parent, we'll later ask it for the other. So it make sense to
                 # keep the information around when reaching the first parent of a merge
                 # and dropping it after it was provided for the second parents.
                 #
                 # It exists cases were only one parent of the merge will be walked. It
                 # happens when the "destination" the copy tracing is descendant from a
                 # new root, not common with the "source". In that case, we will only walk
                 # through merge parents that are descendant of changesets common
                 # between "source" and "destination".
                 #
                 # With the current case implementation if such changesets have a copy
                 # information, we'll keep them in memory until the end of
                 # _changesetforwardcopies. We don't expect the case to be frequent
                 # enough to matters.
                 #
                 # In addition, it would be possible to reach pathological case, were
                 # many first parent are met before any second parent is reached. In
                 # that case the cache could grow. If this even become an issue one can
                 # safely introduce a maximum cache size. This would trade extra CPU/IO
                 # time to save memory.
                 merge_caches = {}
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     value = None
                     e = merge_caches.pop(rev, None)
                     if e is not None:
                         return e
                     changes = None
                     if flags(rev) & HASCOPIESINFO:
                         changes = changelogrevision(rev).changes
                     value = (p1, p2, changes)
                     if p1 != node.nullrev and p2 != node.nullrev:
                         # XXX some case we over cache, IGNORE
                         merge_caches[rev] = value
                     return value
                 return revinfo
+            def cached_is_ancestor(is_ancestor):
+                """return a cached version of is_ancestor"""
+                cache = {}
+                def _is_ancestor(anc, desc):
+                    if anc > desc:
+                        return False
+                    elif anc == desc:
+                        return True
+                    key = (anc, desc)
+                    ret = cache.get(key)
+                    if ret is None:
+                        ret = cache[key] = is_ancestor(anc, desc)
+                    return ret
+                return _is_ancestor
             def _changesetforwardcopies(a, b, match):
                 if a.rev() in (node.nullrev, b.rev()):
                     return {}
                 repo = a.repo().unfiltered()
                 children = {}
                 cl = repo.changelog
-                isancestor = cl.isancestorrev  # XXX we should had chaching to this.
+                isancestor = cached_is_ancestor(cl.isancestorrev)
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 mrset = set(missingrevs)
                 roots = set()
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                         if p not in mrset:
                             roots.add(p)
                 if not roots:
                     # no common revision to track copies from
                     return {}
                 min_root = min(roots)
                 from_head = set(
                     cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
                 )
                 iterrevs = set(from_head)
                 iterrevs &= mrset
                 iterrevs.update(roots)
                 iterrevs.remove(b.rev())
                 revs = sorted(iterrevs)
                 if repo.filecopiesmode == b'changeset-sidedata':
                     revinfo = _revinfo_getter(repo)
                     return _combine_changeset_copies(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
                 else:
                     revinfo = _revinfo_getter_extra(repo)
                     return _combine_changeset_copies_extra(
                         revs, children, b.rev(), revinfo, match, isancestor
                     )
             def _combine_changeset_copies(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """combine the copies information for each item of iterrevs
                 revs: sorted iterable of revision to visit
                 children: a {parent: [children]} mapping.
                 targetrev: the final copies destination revision (not in iterrevs)
                 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
                 match: a matcher
                 It returns the aggregated copies information for `targetrev`.
                 """
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, changes = revinfo(c)
                         childcopies = {}
                         if r == p1:
                             parent = 1
                             if changes is not None:
                                 childcopies = changes.copied_from_p1
                         else:
                             assert r == p2
                             parent = 2
                             if changes is not None:
                                 childcopies = changes.copied_from_p2
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         if changes is not None:
                             for f in changes.removed:
                                 if f in newcopies:
                                     if newcopies is copies:
                                         # copy on write to avoid affecting potential other
                                         # branches.  when there are no other branches, this
                                         # could be avoided.
                                         newcopies = copies.copy()
                                     newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict(
                                     othercopies, newcopies, isancestor, changes
                                 )
                             else:
                                 _merge_copies_dict(
                                     newcopies, othercopies, isancestor, changes
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict(minor, major, isancestor, changes):
                 """merge two copies-mapping together, minor and major
                 In case of conflict, value from "major" will be picked.
                 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
                                                     ancestors of `high_rev`,
                 - `ismerged(path)`: callable return True if `path` have been merged in the
                                     current revision,
                 """
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if new_tt == other_tt:
                             minor[dest] = value
                         elif (
                             changes is not None
                             and value[1] is None
                             and dest in changes.salvaged
                         ):
                             pass
                         elif (
                             changes is not None
                             and other[1] is None
                             and dest in changes.salvaged
                         ):
                             minor[dest] = value
                         elif changes is not None and dest in changes.merged:
                             minor[dest] = value
                         elif not isancestor(new_tt, other_tt):
                             if value[1] is not None:
                                 minor[dest] = value
                             elif isancestor(other_tt, new_tt):
                                 minor[dest] = value
             def _revinfo_getter_extra(repo):
                 """return a function that return multiple data given a <rev>"i
                 * p1: revision number of first parent
                 * p2: revision number of first parent
                 * p1copies: mapping of copies from p1
                 * p2copies: mapping of copies from p2
                 * removed: a list of removed files
                 * ismerged: a callback to know if file was merged in that revision
                 """
                 cl = repo.changelog
                 parents = cl.parentrevs
                 def get_ismerged(rev):
                     ctx = repo[rev]
                     def ismerged(path):
                         if path not in ctx.files():
                             return False
                         fctx = ctx[path]
                         parents = fctx._filelog.parents(fctx._filenode)
                         nb_parents = 0
                         for n in parents:
                             if n != node.nullid:
                                 nb_parents += 1
                         return nb_parents >= 2
                     return ismerged
                 def revinfo(rev):
                     p1, p2 = parents(rev)
                     ctx = repo[rev]
                     p1copies, p2copies = ctx._copies
                     removed = ctx.filesremoved()
                     return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
                 return revinfo
             def _combine_changeset_copies_extra(
                 revs, children, targetrev, revinfo, match, isancestor
             ):
                 """version of `_combine_changeset_copies` that works with the Google
                 specific "extra" based storage for copy information"""
                 all_copies = {}
                 alwaysmatch = match.always()
                 for r in revs:
                     copies = all_copies.pop(r, None)
                     if copies is None:
                         # this is a root
                         copies = {}
                     for i, c in enumerate(children[r]):
                         p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
                         if r == p1:
                             parent = 1
                             childcopies = p1copies
                         else:
                             assert r == p2
                             parent = 2
                             childcopies = p2copies
                         if not alwaysmatch:
                             childcopies = {
                                 dst: src for dst, src in childcopies.items() if match(dst)
                             }
                         newcopies = copies
                         if childcopies:
                             newcopies = copies.copy()
                             for dest, source in pycompat.iteritems(childcopies):
                                 prev = copies.get(source)
                                 if prev is not None and prev[1] is not None:
                                     source = prev[1]
                                 newcopies[dest] = (c, source)
                             assert newcopies is not copies
                         for f in removed:
                             if f in newcopies:
                                 if newcopies is copies:
                                     # copy on write to avoid affecting potential other
                                     # branches.  when there are no other branches, this
                                     # could be avoided.
                                     newcopies = copies.copy()
                                 newcopies[f] = (c, None)
                         othercopies = all_copies.get(c)
                         if othercopies is None:
                             all_copies[c] = newcopies
                         else:
                             # we are the second parent to work on c, we need to merge our
                             # work with the other.
                             #
                             # In case of conflict, parent 1 take precedence over parent 2.
                             # This is an arbitrary choice made anew when implementing
                             # changeset based copies. It was made without regards with
                             # potential filelog related behavior.
                             if parent == 1:
                                 _merge_copies_dict_extra(
                                     othercopies, newcopies, isancestor, ismerged
                                 )
                             else:
                                 _merge_copies_dict_extra(
                                     newcopies, othercopies, isancestor, ismerged
                                 )
                                 all_copies[c] = newcopies
                 final_copies = {}
                 for dest, (tt, source) in all_copies[targetrev].items():
                     if source is not None:
                         final_copies[dest] = source
                 return final_copies
             def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
                 """version of `_merge_copies_dict` that works with the Google
                 specific "extra" based storage for copy information"""
                 for dest, value in major.items():
                     other = minor.get(dest)
                     if other is None:
                         minor[dest] = value
                     else:
                         new_tt = value[0]
                         other_tt = other[0]
                         if value[1] == other[1]:
                             continue
                         # content from "major" wins, unless it is older
                         # than the branch point or there is a merge
                         if (
                             new_tt == other_tt
                             or not isancestor(new_tt, other_tt)
                             or ismerged(dest)
                         ):
                             minor[dest] = value
             def _forwardcopies(a, b, base=None, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 if base is None:
                     base = a
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     cm = _committedforwardcopies(a, b.p1(), base, match)
                     # combine copies from dirstate if necessary
                     copies = _chain(cm, _dirstatecopies(b._repo, match))
                 else:
                     copies = _committedforwardcopies(a, b, base, match)
                 return copies
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(pycompat.iteritems(f)):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
                 if debug:
                     repo.ui.debug(
                         b'debug.copies: searching copies from %s to %s\n' % (x, y)
                     )
                 if x == y or not x or not y:
                     return {}
                 if y.rev() is None and x == y.p1():
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: dirstate\n')
                     # short-circuit to avoid issues with merge states
                     return _dirstatecopies(repo, match)
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: forward\n')
                     copies = _forwardcopies(x, y, match=match)
                 elif a == y:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: backward\n')
                     copies = _backwardrenames(x, y, match=match)
                 else:
                     if debug:
                         repo.ui.debug(b'debug.copies: search mode: combined\n')
                     base = None
                     if a.rev() != node.nullrev:
                         base = x
                     copies = _chain(
                         _backwardrenames(x, a, match=match),
                         _forwardcopies(a, y, base, match=match),
                     )
                 _filter(x, y, copies)
                 return copies
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns a tuple where:
                 "branch_copies" an instance of branch_copies.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return branch_copies(), branch_copies(), {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return (
                         branch_copies(_dirstatecopies(repo, narrowmatch)),
                         branch_copies(),
                         {},
                     )
                 copytracing = repo.ui.config(b'experimental', b'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return branch_copies(), branch_copies(), {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == b'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint(
                         b'experimental', b'copytrace.sourcecommitlimit'
                     )
                     commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(
                 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
             ):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif src not in mb:
                     # Work around the "short-circuit to avoid issues with merge states"
                     # thing in pathcopies(): pathcopies(x, y) can return a copy where the
                     # destination doesn't exist in y.
                     pass
                 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
                     return
                 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
                     # modified on side 2
                     for dst in dsts1:
                         copy[dst] = src
             class branch_copies(object):
                 """Information about copies made on one side of a merge/graft.
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 def __init__(
                     self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
                 ):
                     self.copy = {} if copy is None else copy
                     self.renamedelete = {} if renamedelete is None else renamedelete
                     self.dirmove = {} if dirmove is None else dirmove
                     self.movewithdir = {} if movewithdir is None else movewithdir
                 def __repr__(self):
                     return (
                         '<branch_copies\n  copy=%r\n  renamedelete=%r\n  dirmove=%r\n  movewithdir=%r\n>'
                         % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
                     )
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 if not (copies1 or copies2):
                     return branch_copies(), branch_copies(), {}
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy1 = {}
                 copy2 = {}
                 diverge = {}
                 renamedelete1 = {}
                 renamedelete2 = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in dsts1 & dsts2:
                                     copy1[dst] = src
                                     copy2[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in dsts1 & dsts2:
                                 copy1[dst] = src
                                 copy2[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(
                             src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
                         )
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(
                             src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
                         )
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = b"  unmatched files in %s"
                 if u1:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
                 if u2:
                     repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
                 if repo.ui.debugflag:
                     renamedeleteset = set()
                     divergeset = set()
                     for dsts in diverge.values():
                         divergeset.update(dsts)
                     for dsts in renamedelete1.values():
                         renamedeleteset.update(dsts)
                     for dsts in renamedelete2.values():
                         renamedeleteset.update(dsts)
                     repo.ui.debug(
                         b"  all copies found (* = to merge, ! = divergent, "
                         b"% = renamed and deleted):\n"
                     )
                     for side, copies in ((b"local", copies1), (b"remote", copies2)):
                         if not copies:
                             continue
                         repo.ui.debug(b"   on %s side:\n" % side)
                         for f in sorted(copies):
                             note = b""
                             if f in copy1 or f in copy2:
                                 note += b"*"
                             if f in divergeset:
                                 note += b"!"
                             if f in renamedeleteset:
                                 note += b"%"
                             repo.ui.debug(
                                 b"    src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
                             )
                     del renamedeleteset
                     del divergeset
                 repo.ui.debug(b"  checking for directory renames\n")
                 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
                 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
                 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
                 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
                 return branch_copies1, branch_copies2, diverge
             def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
                 """Finds moved directories and files that should move with them.
                 ctx: the context for one of the sides
                 copy: files copied on the same side (as ctx)
                 fullcopy: files copied on the same side (as ctx), including those that
                           merge.manifestmerge() won't care about
                 addedfiles: added files on the other side (compared to ctx)
                 """
                 # generate a directory move map
                 d = ctx.dirs()
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in pycompat.iteritems(fullcopy):
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d and ddst in d:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d, invalid
                 if not dirmove:
                     return {}, {}
                 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
                 for d in dirmove:
                     repo.ui.debug(
                         b"   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
                     )
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in addedfiles:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d) :]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(
                                         b"   pending file src: '%s' -> dst: '%s'\n"
                                         % (f, df)
                                     )
                                 break
                 return dirmove, movewithdir
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug(
                         b"switching to full copytracing as base is not "
                         b"an ancestor of c2\n"
                     )
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug(b"switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 copies2 = {}
                 cp = _forwardcopies(base, c2)
                 for dst, src in pycompat.iteritems(cp):
                     if src in m1:
                         copies2[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 copies1 = {}
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint(
                             b'experimental', b'copytrace.movecandidateslimit'
                         )
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(
                                 _(
                                     b"skipping copytracing for '%s', more "
                                     b"candidates than the limit: %d\n"
                                 )
                                 % (f, len(movecandidates))
                             )
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies1[candidate] = f
                 return branch_copies(copies1), branch_copies(copies2), {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True  # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else:  # f1 and f2 point to files in the same linkrev
                             return f1 == f2  # true if they point to the same file
                 except StopIteration:
                     return False
             def graftcopies(wctx, ctx, base):
                 """reproduce copies between base and ctx in the wctx
                 Unlike mergecopies(), this function will only consider copies between base
                 and ctx; it will ignore copies between base and wctx. Also unlike
                 mergecopies(), this function will apply copies to the working copy (instead
                 of just returning information about the copies). That makes it cheaper
                 (especially in the common case of base==ctx.p1()) and useful also when
                 experimental.copytrace=off.
                 merge.update() will have already marked most copies, but it will only
                 mark copies if it thinks the source files are related (see
                 merge._related()). It will also not mark copies if the file wasn't modified
                 on the local side. This function adds the copies that were "missed"
                 by merge.update().
                 """
                 new_copies = pathcopies(base, ctx)
                 _filter(wctx.p1(), wctx, new_copies)
                 for dst, src in pycompat.iteritems(new_copies):
                     wctx[dst].markcopied(src)