upstream/mercurial-mirror Commit - r34517:e79b3611

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from . import (

14

from . import (

15

match as matchmod,

15

match as matchmod,

16

node,

16

node,

17

pathutil,

17

pathutil,

18

scmutil,

18

scmutil,

19

util,

19

util,

20

)

20

)

21

22

def _findlimit(repo, a, b):

22

def _findlimit(repo, a, b):

23

"""

23

"""

24

Find the last revision that needs to be checked to ensure that a full

24

Find the last revision that needs to be checked to ensure that a full

25

transitive closure for file copies can be properly calculated.

25

transitive closure for file copies can be properly calculated.

26

Generally, this means finding the earliest revision number that's an

26

Generally, this means finding the earliest revision number that's an

27

ancestor of a or b but not both, except when a or b is a direct descendent

27

ancestor of a or b but not both, except when a or b is a direct descendent

28

of the other, in which case we can return the minimum revnum of a and b.

28

of the other, in which case we can return the minimum revnum of a and b.

29

None if no such revision exists.

29

None if no such revision exists.

30

"""

30

"""

31

32

# basic idea:

32

# basic idea:

33

# - mark a and b with different sides

33

# - mark a and b with different sides

34

# - if a parent's children are all on the same side, the parent is

34

# - if a parent's children are all on the same side, the parent is

35

# on that side, otherwise it is on no side

35

# on that side, otherwise it is on no side

36

# - walk the graph in topological order with the help of a heap;

36

# - walk the graph in topological order with the help of a heap;

37

# - add unseen parents to side map

37

# - add unseen parents to side map

38

# - clear side of any parent that has children on different sides

38

# - clear side of any parent that has children on different sides

39

# - track number of interesting revs that might still be on a side

39

# - track number of interesting revs that might still be on a side

40

# - track the lowest interesting rev seen

40

# - track the lowest interesting rev seen

41

# - quit when interesting revs is zero

41

# - quit when interesting revs is zero

42

43

cl = repo.changelog

43

cl = repo.changelog

44

working = len(cl) # pseudo rev for the working directory

44

working = len(cl) # pseudo rev for the working directory

45

if a is None:

45

if a is None:

46

a = working

46

a = working

47

if b is None:

47

if b is None:

48

b = working

48

b = working

49

50

side = {a: -1, b: 1}

50

side = {a: -1, b: 1}

51

visit = [-a, -b]

51

visit = [-a, -b]

52

heapq.heapify(visit)

52

heapq.heapify(visit)

53

interesting = len(visit)

53

interesting = len(visit)

54

hascommonancestor = False

54

hascommonancestor = False

55

limit = working

55

limit = working

56

57

while interesting:

57

while interesting:

58

r = -heapq.heappop(visit)

58

r = -heapq.heappop(visit)

59

if r == working:

59

if r == working:

60

parents = [cl.rev(p) for p in repo.dirstate.parents()]

60

parents = [cl.rev(p) for p in repo.dirstate.parents()]

61

else:

61

else:

62

parents = cl.parentrevs(r)

62

parents = cl.parentrevs(r)

63

for p in parents:

63

for p in parents:

64

if p < 0:

64

if p < 0:

65

continue

65

continue

66

if p not in side:

66

if p not in side:

67

# first time we see p; add it to visit

67

# first time we see p; add it to visit

68

side[p] = side[r]

68

side[p] = side[r]

69

if side[p]:

69

if side[p]:

70

interesting += 1

70

interesting += 1

71

heapq.heappush(visit, -p)

71

heapq.heappush(visit, -p)

72

elif side[p] and side[p] != side[r]:

72

elif side[p] and side[p] != side[r]:

73

# p was interesting but now we know better

73

# p was interesting but now we know better

74

side[p] = 0

74

side[p] = 0

75

interesting -= 1

75

interesting -= 1

76

hascommonancestor = True

76

hascommonancestor = True

77

if side[r]:

77

if side[r]:

78

limit = r # lowest rev visited

78

limit = r # lowest rev visited

79

interesting -= 1

79

interesting -= 1

80

81

if not hascommonancestor:

81

if not hascommonancestor:

82

return None

82

return None

83

84

# Consider the following flow (see test-commit-amend.t under issue4405):

84

# Consider the following flow (see test-commit-amend.t under issue4405):

85

# 1/ File 'a0' committed

85

# 1/ File 'a0' committed

86

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

86

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

87

# 3/ Move back to first commit

87

# 3/ Move back to first commit

88

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

88

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

89

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

89

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

90

#

90

#

91

# During the amend in step five, we will be in this state:

91

# During the amend in step five, we will be in this state:

92

#

92

#

93

# @ 3 temporary amend commit for a1-amend

93

# @ 3 temporary amend commit for a1-amend

94

# |

94

# |

95

# o 2 a1-amend

95

# o 2 a1-amend

96

# |

96

# |

97

# | o 1 a1

97

# | o 1 a1

98

# |/

98

# |/

99

# o 0 a0

99

# o 0 a0

100

#

100

#

101

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

101

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

102

# yet the filelog has the copy information in rev 1 and we will not look

102

# yet the filelog has the copy information in rev 1 and we will not look

103

# back far enough unless we also look at the a and b as candidates.

103

# back far enough unless we also look at the a and b as candidates.

104

# This only occurs when a is a descendent of b or visa-versa.

104

# This only occurs when a is a descendent of b or visa-versa.

105

return min(limit, a, b)

105

return min(limit, a, b)

106

107

def _chain(src, dst, a, b):

107

def _chain(src, dst, a, b):

108

'''chain two sets of copies a->b'''

108

'''chain two sets of copies a->b'''

109

t = a.copy()

109

t = a.copy()

110

for k, v in b.iteritems():

110

for k, v in b.iteritems():

111

if v in t:

111

if v in t:

112

# found a chain

112

# found a chain

113

if t[v] != k:

113

if t[v] != k:

114

# file wasn't renamed back to itself

114

# file wasn't renamed back to itself

115

t[k] = t[v]

115

t[k] = t[v]

116

if v not in dst:

116

if v not in dst:

117

# chain was a rename, not a copy

117

# chain was a rename, not a copy

118

del t[v]

118

del t[v]

119

if v in src:

119

if v in src:

120

# file is a copy of an existing file

120

# file is a copy of an existing file

121

t[k] = v

121

t[k] = v

122

123

# remove criss-crossed copies

123

# remove criss-crossed copies

124

for k, v in t.items():

124

for k, v in t.items():

125

if k in src and v in dst:

125

if k in src and v in dst:

126

del t[k]

126

del t[k]

127

128

return t

128

return t

129

130

def _tracefile(fctx, am, limit=-1):

130

def _tracefile(fctx, am, limit=-1):

131

'''return file context that is the ancestor of fctx present in ancestor

131

'''return file context that is the ancestor of fctx present in ancestor

132

manifest am, stopping after the first ancestor lower than limit'''

132

manifest am, stopping after the first ancestor lower than limit'''

133

134

for f in fctx.ancestors():

134

for f in fctx.ancestors():

135

if am.get(f.path(), None) == f.filenode():

135

if am.get(f.path(), None) == f.filenode():

136

return f

136

return f

137

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

137

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

138

return None

138

return None

139

140

def _dirstatecopies(d):

140

def _dirstatecopies(d):

141

ds = d._repo.dirstate

141

ds = d._repo.dirstate

142

c = ds.copies().copy()

142

c = ds.copies().copy()

143

for k in list(c):

143

for k in list(c):

144

if ds[k] not in 'anm':

144

if ds[k] not in 'anm':

145

del c[k]

145

del c[k]

146

return c

146

return c

147

148

def _computeforwardmissing(a, b, match=None):

148

def _computeforwardmissing(a, b, match=None):

149

"""Computes which files are in b but not a.

149

"""Computes which files are in b but not a.

150

This is its own function so extensions can easily wrap this call to see what

150

This is its own function so extensions can easily wrap this call to see what

151

files _forwardcopies is about to process.

151

files _forwardcopies is about to process.

152

"""

152

"""

153

ma = a.manifest()

153

ma = a.manifest()

154

mb = b.manifest()

154

mb = b.manifest()

155

return mb.filesnotin(ma, match=match)

155

return mb.filesnotin(ma, match=match)

156

157

def _forwardcopies(a, b, match=None):

157

def _forwardcopies(a, b, match=None):

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

158

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

159

160

# check for working copy

160

# check for working copy

161

w = None

161

w = None

162

if b.rev() is None:

162

if b.rev() is None:

163

w = b

163

w = b

164

b = w.p1()

164

b = w.p1()

165

if a == b:

165

if a == b:

166

# short-circuit to avoid issues with merge states

166

# short-circuit to avoid issues with merge states

167

return _dirstatecopies(w)

167

return _dirstatecopies(w)

168

169

# files might have to be traced back to the fctx parent of the last

169

# files might have to be traced back to the fctx parent of the last

170

# one-side-only changeset, but not further back than that

170

# one-side-only changeset, but not further back than that

171

limit = _findlimit(a._repo, a.rev(), b.rev())

171

limit = _findlimit(a._repo, a.rev(), b.rev())

172

if limit is None:

172

if limit is None:

173

limit = -1

173

limit = -1

174

am = a.manifest()

174

am = a.manifest()

175

176

# find where new files came from

176

# find where new files came from

177

# we currently don't try to find where old files went, too expensive

177

# we currently don't try to find where old files went, too expensive

178

# this means we can miss a case like 'hg rm b; hg cp a b'

178

# this means we can miss a case like 'hg rm b; hg cp a b'

179

cm = {}

179

cm = {}

180

181

# Computing the forward missing is quite expensive on large manifests, since

181

# Computing the forward missing is quite expensive on large manifests, since

182

# it compares the entire manifests. We can optimize it in the common use

182

# it compares the entire manifests. We can optimize it in the common use

183

# case of computing what copies are in a commit versus its parent (like

183

# case of computing what copies are in a commit versus its parent (like

184

# during a rebase or histedit). Note, we exclude merge commits from this

184

# during a rebase or histedit). Note, we exclude merge commits from this

185

# optimization, since the ctx.files() for a merge commit is not correct for

185

# optimization, since the ctx.files() for a merge commit is not correct for

186

# this comparison.

186

# this comparison.

187

forwardmissingmatch = match

187

forwardmissingmatch = match

188

if b.p1() == a and b.p2().node() == node.nullid:

188

if b.p1() == a and b.p2().node() == node.nullid:

189

filesmatcher = scmutil.matchfiles(a._repo, b.files())

189

filesmatcher = scmutil.matchfiles(a._repo, b.files())

190

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

190

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

191

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

191

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

192

193

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

193

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

194

for f in missing:

194

for f in missing:

195

fctx = b[f]

195

fctx = b[f]

196

fctx._ancestrycontext = ancestrycontext

196

fctx._ancestrycontext = ancestrycontext

197

ofctx = _tracefile(fctx, am, limit)

197

ofctx = _tracefile(fctx, am, limit)

198

if ofctx:

198

if ofctx:

199

cm[f] = ofctx.path()

199

cm[f] = ofctx.path()

200

201

# combine copies from dirstate if necessary

201

# combine copies from dirstate if necessary

202

if w is not None:

202

if w is not None:

203

cm = _chain(a, w, cm, _dirstatecopies(w))

203

cm = _chain(a, w, cm, _dirstatecopies(w))

204

205

return cm

205

return cm

206

207

def _backwardrenames(a, b):

207

def _backwardrenames(a, b):

208

if a._repo.ui.config('experimental', 'copytrace') == 'off':

208

if a._repo.ui.config('experimental', 'copytrace') == 'off':

209

return {}

209

return {}

210

211

# Even though we're not taking copies into account, 1:n rename situations

211

# Even though we're not taking copies into account, 1:n rename situations

212

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

212

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

213

# arbitrarily pick one of the renames.

213

# arbitrarily pick one of the renames.

214

f = _forwardcopies(b, a)

214

f = _forwardcopies(b, a)

215

r = {}

215

r = {}

216

for k, v in sorted(f.iteritems()):

216

for k, v in sorted(f.iteritems()):

217

# remove copies

217

# remove copies

218

if v in a:

218

if v in a:

219

continue

219

continue

220

r[v] = k

220

r[v] = k

221

return r

221

return r

222

223

def pathcopies(x, y, match=None):

223

def pathcopies(x, y, match=None):

224

'''find {dst@y: src@x} copy mapping for directed compare'''

224

'''find {dst@y: src@x} copy mapping for directed compare'''

225

if x == y or not x or not y:

225

if x == y or not x or not y:

226

return {}

226

return {}

227

a = y.ancestor(x)

227

a = y.ancestor(x)

228

if a == x:

228

if a == x:

229

return _forwardcopies(x, y, match=match)

229

return _forwardcopies(x, y, match=match)

230

if a == y:

230

if a == y:

231

return _backwardrenames(x, y)

231

return _backwardrenames(x, y)

232

return _chain(x, y, _backwardrenames(x, a),

232

return _chain(x, y, _backwardrenames(x, a),

233

_forwardcopies(a, y, match=match))

233

_forwardcopies(a, y, match=match))

234

235

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

235

def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):

236

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

236

"""Computes, based on addedinm1 and addedinm2, the files exclusive to c1

237

and c2. This is its own function so extensions can easily wrap this call

237

and c2. This is its own function so extensions can easily wrap this call

238

to see what files mergecopies is about to process.

238

to see what files mergecopies is about to process.

239

240

Even though c1 and c2 are not used in this function, they are useful in

240

Even though c1 and c2 are not used in this function, they are useful in

241

other extensions for being able to read the file nodes of the changed files.

241

other extensions for being able to read the file nodes of the changed files.

242

243

"baselabel" can be passed to help distinguish the multiple computations

243

"baselabel" can be passed to help distinguish the multiple computations

244

done in the graft case.

244

done in the graft case.

245

"""

245

"""

246

u1 = sorted(addedinm1 - addedinm2)

246

u1 = sorted(addedinm1 - addedinm2)

247

u2 = sorted(addedinm2 - addedinm1)

247

u2 = sorted(addedinm2 - addedinm1)

248

249

header = " unmatched files in %s"

249

header = " unmatched files in %s"

250

if baselabel:

250

if baselabel:

251

header += ' (from %s)' % baselabel

251

header += ' (from %s)' % baselabel

252

if u1:

252

if u1:

253

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

253

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

254

if u2:

254

if u2:

255

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

255

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

256

return u1, u2

256

return u1, u2

257

258

def _makegetfctx(ctx):

258

def _makegetfctx(ctx):

259

"""return a 'getfctx' function suitable for _checkcopies usage

259

"""return a 'getfctx' function suitable for _checkcopies usage

260

261

We have to re-setup the function building 'filectx' for each

261

We have to re-setup the function building 'filectx' for each

262

'_checkcopies' to ensure the linkrev adjustment is properly setup for

262

'_checkcopies' to ensure the linkrev adjustment is properly setup for

263

each. Linkrev adjustment is important to avoid bug in rename

263

each. Linkrev adjustment is important to avoid bug in rename

264

detection. Moreover, having a proper '_ancestrycontext' setup ensures

264

detection. Moreover, having a proper '_ancestrycontext' setup ensures

265

the performance impact of this adjustment is kept limited. Without it,

265

the performance impact of this adjustment is kept limited. Without it,

266

each file could do a full dag traversal making the time complexity of

266

each file could do a full dag traversal making the time complexity of

267

the operation explode (see issue4537).

267

the operation explode (see issue4537).

268

269

This function exists here mostly to limit the impact on stable. Feel

269

This function exists here mostly to limit the impact on stable. Feel

270

free to refactor on default.

270

free to refactor on default.

271

"""

271

"""

272

rev = ctx.rev()

272

rev = ctx.rev()

273

repo = ctx._repo

273

repo = ctx._repo

274

ac = getattr(ctx, '_ancestrycontext', None)

274

ac = getattr(ctx, '_ancestrycontext', None)

275

if ac is None:

275

if ac is None:

276

revs = [rev]

276

revs = [rev]

277

if rev is None:

277

if rev is None:

278

revs = [p.rev() for p in ctx.parents()]

278

revs = [p.rev() for p in ctx.parents()]

279

ac = repo.changelog.ancestors(revs, inclusive=True)

279

ac = repo.changelog.ancestors(revs, inclusive=True)

280

ctx._ancestrycontext = ac

280

ctx._ancestrycontext = ac

281

def makectx(f, n):

281

def makectx(f, n):

282

if n in node.wdirnodes: # in a working context?

282

if n in node.wdirnodes: # in a working context?

283

if ctx.rev() is None:

283

if ctx.rev() is None:

284

return ctx.filectx(f)

284

return ctx.filectx(f)

285

return repo[None][f]

285

return repo[None][f]

286

fctx = repo.filectx(f, fileid=n)

286

fctx = repo.filectx(f, fileid=n)

287

# setup only needed for filectx not create from a changectx

287

# setup only needed for filectx not create from a changectx

288

fctx._ancestrycontext = ac

288

fctx._ancestrycontext = ac

289

fctx._descendantrev = rev

289

fctx._descendantrev = rev

290

return fctx

290

return fctx

291

return util.lrucachefunc(makectx)

291

return util.lrucachefunc(makectx)

292

293

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

293

def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):

294

"""combine partial copy paths"""

294

"""combine partial copy paths"""

295

remainder = {}

295

remainder = {}

296

for f in copyfrom:

296

for f in copyfrom:

297

if f in copyto:

297

if f in copyto:

298

finalcopy[copyto[f]] = copyfrom[f]

298

finalcopy[copyto[f]] = copyfrom[f]

299

del copyto[f]

299

del copyto[f]

300

for f in incompletediverge:

300

for f in incompletediverge:

301

assert f not in diverge

301

assert f not in diverge

302

ic = incompletediverge[f]

302

ic = incompletediverge[f]

303

if ic[0] in copyto:

303

if ic[0] in copyto:

304

diverge[f] = [copyto[ic[0]], ic[1]]

304

diverge[f] = [copyto[ic[0]], ic[1]]

305

else:

305

else:

306

remainder[f] = ic

306

remainder[f] = ic

307

return remainder

307

return remainder

308

309

def mergecopies(repo, c1, c2, base):

309

def mergecopies(repo, c1, c2, base):

310

"""

310

"""

311

The function calling different copytracing algorithms on the basis of config

311

The function calling different copytracing algorithms on the basis of config

312

which find moves and copies between context c1 and c2 that are relevant for

312

which find moves and copies between context c1 and c2 that are relevant for

313

merging. 'base' will be used as the merge base.

313

merging. 'base' will be used as the merge base.

314

315

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

315

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

316

files that were moved/ copied in one merge parent and modified in another.

316

files that were moved/ copied in one merge parent and modified in another.

317

For example:

317

For example:

318

319

o ---> 4 another commit

319

o ---> 4 another commit

320

|

320

|

321

| o ---> 3 commit that modifies a.txt

321

| o ---> 3 commit that modifies a.txt

322

| /

322

| /

323

o / ---> 2 commit that moves a.txt to b.txt

323

o / ---> 2 commit that moves a.txt to b.txt

324

|/

324

|/

325

o ---> 1 merge base

325

o ---> 1 merge base

326

327

If we try to rebase revision 3 on revision 4, since there is no a.txt in

327

If we try to rebase revision 3 on revision 4, since there is no a.txt in

328

revision 4, and if user have copytrace disabled, we prints the following

328

revision 4, and if user have copytrace disabled, we prints the following

329

message:

329

message:

330

331

```other changed <file> which local deleted```

331

```other changed <file> which local deleted```

332

333

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

333

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

334

"dirmove".

334

"dirmove".

335

336

"copy" is a mapping from destination name -> source name,

336

"copy" is a mapping from destination name -> source name,

337

where source is in c1 and destination is in c2 or vice-versa.

337

where source is in c1 and destination is in c2 or vice-versa.

338

339

"movewithdir" is a mapping from source name -> destination name,

339

"movewithdir" is a mapping from source name -> destination name,

340

where the file at source present in one context but not the other

340

where the file at source present in one context but not the other

341

needs to be moved to destination by the merge process, because the

341

needs to be moved to destination by the merge process, because the

342

other context moved the directory it is in.

342

other context moved the directory it is in.

343

344

"diverge" is a mapping of source name -> list of destination names

344

"diverge" is a mapping of source name -> list of destination names

345

for divergent renames.

345

for divergent renames.

346

347

"renamedelete" is a mapping of source name -> list of destination

347

"renamedelete" is a mapping of source name -> list of destination

348

names for files deleted in c1 that were renamed in c2 or vice-versa.

348

names for files deleted in c1 that were renamed in c2 or vice-versa.

349

350

"dirmove" is a mapping of detected source dir -> destination dir renames.

350

"dirmove" is a mapping of detected source dir -> destination dir renames.

351

This is needed for handling changes to new files previously grafted into

351

This is needed for handling changes to new files previously grafted into

352

renamed directories.

352

renamed directories.

353

"""

353

"""

354

# avoid silly behavior for update from empty dir

354

# avoid silly behavior for update from empty dir

355

if not c1 or not c2 or c1 == c2:

355

if not c1 or not c2 or c1 == c2:

356

return {}, {}, {}, {}, {}

356

return {}, {}, {}, {}, {}

357

358

# avoid silly behavior for parent -> working dir

358

# avoid silly behavior for parent -> working dir

359

if c2.node() is None and c1.node() == repo.dirstate.p1():

359

if c2.node() is None and c1.node() == repo.dirstate.p1():

360

return repo.dirstate.copies(), {}, {}, {}, {}

360

return repo.dirstate.copies(), {}, {}, {}, {}

361

362

copytracing = repo.ui.config('experimental', 'copytrace')

362

copytracing = repo.ui.config('experimental', 'copytrace')

363

364

# Copy trace disabling is explicitly below the node == p1 logic above

364

# Copy trace disabling is explicitly below the node == p1 logic above

365

# because the logic above is required for a simple copy to be kept across a

365

# because the logic above is required for a simple copy to be kept across a

366

# rebase.

366

# rebase.

367

if copytracing == 'off':

367

if copytracing == 'off':

368

return {}, {}, {}, {}, {}

368

return {}, {}, {}, {}, {}

369

elif copytracing == 'heuristics':

369

elif copytracing == 'heuristics':

370

# Do full copytracing if only non-public revisions are involved as

370

# Do full copytracing if only non-public revisions are involved as

371

# that will be fast enough and will also cover the copies which could

371

# that will be fast enough and will also cover the copies which could

372

# be missed by heuristics

372

# be missed by heuristics

373

if _isfullcopytraceable(repo, c1, base):

373

if _isfullcopytraceable(repo, c1, base):

374

return _fullcopytracing(repo, c1, c2, base)

374

return _fullcopytracing(repo, c1, c2, base)

375

return _heuristicscopytracing(repo, c1, c2, base)

375

return _heuristicscopytracing(repo, c1, c2, base)

376

else:

376

else:

377

return _fullcopytracing(repo, c1, c2, base)

377

return _fullcopytracing(repo, c1, c2, base)

378

379

def _isfullcopytraceable(repo, c1, base):

379

def _isfullcopytraceable(repo, c1, base):

380

""" Checks that if base, source and destination are all no-public branches,

380

""" Checks that if base, source and destination are all no-public branches,

381

if yes let's use the full copytrace algorithm for increased capabilities

381

if yes let's use the full copytrace algorithm for increased capabilities

382

since it will be fast enough.

382

since it will be fast enough.

383

384

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

385

number of changesets from c1 to base such that if number of changesets are

386

more than the limit, full copytracing algorithm won't be used.

383

"""

387

"""

384

if c1.rev() is None:

388

if c1.rev() is None:

385

c1 = c1.p1()

389

c1 = c1.p1()

386

if c1.mutable() and base.mutable():

390

if c1.mutable() and base.mutable():

387

sourcecommitlimit = repo.ui.configint('experimental',

391

sourcecommitlimit = repo.ui.configint('experimental',

388

'copytrace.sourcecommitlimit')

392

'copytrace.sourcecommitlimit')

389

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

393

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

390

return commits < sourcecommitlimit

394

return commits < sourcecommitlimit

391

return False

395

return False

392

396

393

def _fullcopytracing(repo, c1, c2, base):

397

def _fullcopytracing(repo, c1, c2, base):

394

""" The full copytracing algorithm which finds all the new files that were

398

""" The full copytracing algorithm which finds all the new files that were

395

added from merge base up to the top commit and for each file it checks if

399

added from merge base up to the top commit and for each file it checks if

396

this file was copied from another file.

400

this file was copied from another file.

397

401

398

This is pretty slow when a lot of changesets are involved but will track all

402

This is pretty slow when a lot of changesets are involved but will track all

399

the copies.

403

the copies.

400

"""

404

"""

401

# In certain scenarios (e.g. graft, update or rebase), base can be

405

# In certain scenarios (e.g. graft, update or rebase), base can be

402

# overridden We still need to know a real common ancestor in this case We

406

# overridden We still need to know a real common ancestor in this case We

403

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

407

# can't just compute _c1.ancestor(_c2) and compare it to ca, because there

404

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

408

# can be multiple common ancestors, e.g. in case of bidmerge. Because our

405

# caller may not know if the revision passed in lieu of the CA is a genuine

409

# caller may not know if the revision passed in lieu of the CA is a genuine

406

# common ancestor or not without explicitly checking it, it's better to

410

# common ancestor or not without explicitly checking it, it's better to

407

# determine that here.

411

# determine that here.

408

#

412

#

409

# base.descendant(wc) and base.descendant(base) are False, work around that

413

# base.descendant(wc) and base.descendant(base) are False, work around that

410

_c1 = c1.p1() if c1.rev() is None else c1

414

_c1 = c1.p1() if c1.rev() is None else c1

411

_c2 = c2.p1() if c2.rev() is None else c2

415

_c2 = c2.p1() if c2.rev() is None else c2

412

# an endpoint is "dirty" if it isn't a descendant of the merge base

416

# an endpoint is "dirty" if it isn't a descendant of the merge base

413

# if we have a dirty endpoint, we need to trigger graft logic, and also

417

# if we have a dirty endpoint, we need to trigger graft logic, and also

414

# keep track of which endpoint is dirty

418

# keep track of which endpoint is dirty

415

dirtyc1 = not (base == _c1 or base.descendant(_c1))

419

dirtyc1 = not (base == _c1 or base.descendant(_c1))

416

dirtyc2 = not (base == _c2 or base.descendant(_c2))

420

dirtyc2 = not (base == _c2 or base.descendant(_c2))

417

graft = dirtyc1 or dirtyc2

421

graft = dirtyc1 or dirtyc2

418

tca = base

422

tca = base

419

if graft:

423

if graft:

420

tca = _c1.ancestor(_c2)

424

tca = _c1.ancestor(_c2)

421

425

422

limit = _findlimit(repo, c1.rev(), c2.rev())

426

limit = _findlimit(repo, c1.rev(), c2.rev())

423

if limit is None:

427

if limit is None:

424

# no common ancestor, no copies

428

# no common ancestor, no copies

425

return {}, {}, {}, {}, {}

429

return {}, {}, {}, {}, {}

426

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

430

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

427

431

428

m1 = c1.manifest()

432

m1 = c1.manifest()

429

m2 = c2.manifest()

433

m2 = c2.manifest()

430

mb = base.manifest()

434

mb = base.manifest()

431

435

432

# gather data from _checkcopies:

436

# gather data from _checkcopies:

433

# - diverge = record all diverges in this dict

437

# - diverge = record all diverges in this dict

434

# - copy = record all non-divergent copies in this dict

438

# - copy = record all non-divergent copies in this dict

435

# - fullcopy = record all copies in this dict

439

# - fullcopy = record all copies in this dict

436

# - incomplete = record non-divergent partial copies here

440

# - incomplete = record non-divergent partial copies here

437

# - incompletediverge = record divergent partial copies here

441

# - incompletediverge = record divergent partial copies here

438

diverge = {} # divergence data is shared

442

diverge = {} # divergence data is shared

439

incompletediverge = {}

443

incompletediverge = {}

440

data1 = {'copy': {},

444

data1 = {'copy': {},

441

'fullcopy': {},

445

'fullcopy': {},

442

'incomplete': {},

446

'incomplete': {},

443

'diverge': diverge,

447

'diverge': diverge,

444

'incompletediverge': incompletediverge,

448

'incompletediverge': incompletediverge,

445

}

449

}

446

data2 = {'copy': {},

450

data2 = {'copy': {},

447

'fullcopy': {},

451

'fullcopy': {},

448

'incomplete': {},

452

'incomplete': {},

449

'diverge': diverge,

453

'diverge': diverge,

450

'incompletediverge': incompletediverge,

454

'incompletediverge': incompletediverge,

451

}

455

}

452

456

453

# find interesting file sets from manifests

457

# find interesting file sets from manifests

454

addedinm1 = m1.filesnotin(mb)

458

addedinm1 = m1.filesnotin(mb)

455

addedinm2 = m2.filesnotin(mb)

459

addedinm2 = m2.filesnotin(mb)

456

bothnew = sorted(addedinm1 & addedinm2)

460

bothnew = sorted(addedinm1 & addedinm2)

457

if tca == base:

461

if tca == base:

458

# unmatched file from base

462

# unmatched file from base

459

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

463

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)

460

u1u, u2u = u1r, u2r

464

u1u, u2u = u1r, u2r

461

else:

465

else:

462

# unmatched file from base (DAG rotation in the graft case)

466

# unmatched file from base (DAG rotation in the graft case)

463

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

467

u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,

464

baselabel='base')

468

baselabel='base')

465

# unmatched file from topological common ancestors (no DAG rotation)

469

# unmatched file from topological common ancestors (no DAG rotation)

466

# need to recompute this for directory move handling when grafting

470

# need to recompute this for directory move handling when grafting

467

mta = tca.manifest()

471

mta = tca.manifest()

468

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

472

u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),

469

m2.filesnotin(mta),

473

m2.filesnotin(mta),

470

baselabel='topological common ancestor')

474

baselabel='topological common ancestor')

471

475

472

for f in u1u:

476

for f in u1u:

473

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

477

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

474

478

475

for f in u2u:

479

for f in u2u:

476

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

480

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

477

481

478

copy = dict(data1['copy'])

482

copy = dict(data1['copy'])

479

copy.update(data2['copy'])

483

copy.update(data2['copy'])

480

fullcopy = dict(data1['fullcopy'])

484

fullcopy = dict(data1['fullcopy'])

481

fullcopy.update(data2['fullcopy'])

485

fullcopy.update(data2['fullcopy'])

482

486

483

if dirtyc1:

487

if dirtyc1:

484

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

488

_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,

485

incompletediverge)

489

incompletediverge)

486

else:

490

else:

487

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

491

_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,

488

incompletediverge)

492

incompletediverge)

489

493

490

renamedelete = {}

494

renamedelete = {}

491

renamedeleteset = set()

495

renamedeleteset = set()

492

divergeset = set()

496

divergeset = set()

493

for of, fl in list(diverge.items()):

497

for of, fl in list(diverge.items()):

494

if len(fl) == 1 or of in c1 or of in c2:

498

if len(fl) == 1 or of in c1 or of in c2:

495

del diverge[of] # not actually divergent, or not a rename

499

del diverge[of] # not actually divergent, or not a rename

496

if of not in c1 and of not in c2:

500

if of not in c1 and of not in c2:

497

# renamed on one side, deleted on the other side, but filter

501

# renamed on one side, deleted on the other side, but filter

498

# out files that have been renamed and then deleted

502

# out files that have been renamed and then deleted

499

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

503

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

500

renamedeleteset.update(fl) # reverse map for below

504

renamedeleteset.update(fl) # reverse map for below

501

else:

505

else:

502

divergeset.update(fl) # reverse map for below

506

divergeset.update(fl) # reverse map for below

503

507

504

if bothnew:

508

if bothnew:

505

repo.ui.debug(" unmatched files new in both:\n %s\n"

509

repo.ui.debug(" unmatched files new in both:\n %s\n"

506

% "\n ".join(bothnew))

510

% "\n ".join(bothnew))

507

bothdiverge = {}

511

bothdiverge = {}

508

bothincompletediverge = {}

512

bothincompletediverge = {}

509

remainder = {}

513

remainder = {}

510

both1 = {'copy': {},

514

both1 = {'copy': {},

511

'fullcopy': {},

515

'fullcopy': {},

512

'incomplete': {},

516

'incomplete': {},

513

'diverge': bothdiverge,

517

'diverge': bothdiverge,

514

'incompletediverge': bothincompletediverge

518

'incompletediverge': bothincompletediverge

515

}

519

}

516

both2 = {'copy': {},

520

both2 = {'copy': {},

517

'fullcopy': {},

521

'fullcopy': {},

518

'incomplete': {},

522

'incomplete': {},

519

'diverge': bothdiverge,

523

'diverge': bothdiverge,

520

'incompletediverge': bothincompletediverge

524

'incompletediverge': bothincompletediverge

521

}

525

}

522

for f in bothnew:

526

for f in bothnew:

523

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

527

_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)

524

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

528

_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)

525

if dirtyc1:

529

if dirtyc1:

526

# incomplete copies may only be found on the "dirty" side for bothnew

530

# incomplete copies may only be found on the "dirty" side for bothnew

527

assert not both2['incomplete']

531

assert not both2['incomplete']

528

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

532

remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,

529

bothincompletediverge)

533

bothincompletediverge)

530

elif dirtyc2:

534

elif dirtyc2:

531

assert not both1['incomplete']

535

assert not both1['incomplete']

532

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

536

remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,

533

bothincompletediverge)

537

bothincompletediverge)

534

else:

538

else:

535

# incomplete copies and divergences can't happen outside grafts

539

# incomplete copies and divergences can't happen outside grafts

536

assert not both1['incomplete']

540

assert not both1['incomplete']

537

assert not both2['incomplete']

541

assert not both2['incomplete']

538

assert not bothincompletediverge

542

assert not bothincompletediverge

539

for f in remainder:

543

for f in remainder:

540

assert f not in bothdiverge

544

assert f not in bothdiverge

541

ic = remainder[f]

545

ic = remainder[f]

542

if ic[0] in (m1 if dirtyc1 else m2):

546

if ic[0] in (m1 if dirtyc1 else m2):

543

# backed-out rename on one side, but watch out for deleted files

547

# backed-out rename on one side, but watch out for deleted files

544

bothdiverge[f] = ic

548

bothdiverge[f] = ic

545

for of, fl in bothdiverge.items():

549

for of, fl in bothdiverge.items():

546

if len(fl) == 2 and fl[0] == fl[1]:

550

if len(fl) == 2 and fl[0] == fl[1]:

547

copy[fl[0]] = of # not actually divergent, just matching renames

551

copy[fl[0]] = of # not actually divergent, just matching renames

548

552

549

if fullcopy and repo.ui.debugflag:

553

if fullcopy and repo.ui.debugflag:

550

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

554

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

551

"% = renamed and deleted):\n")

555

"% = renamed and deleted):\n")

552

for f in sorted(fullcopy):

556

for f in sorted(fullcopy):

553

note = ""

557

note = ""

554

if f in copy:

558

if f in copy:

555

note += "*"

559

note += "*"

556

if f in divergeset:

560

if f in divergeset:

557

note += "!"

561

note += "!"

558

if f in renamedeleteset:

562

if f in renamedeleteset:

559

note += "%"

563

note += "%"

560

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

564

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

561

note))

565

note))

562

del divergeset

566

del divergeset

563

567

564

if not fullcopy:

568

if not fullcopy:

565

return copy, {}, diverge, renamedelete, {}

569

return copy, {}, diverge, renamedelete, {}

566

570

567

repo.ui.debug(" checking for directory renames\n")

571

repo.ui.debug(" checking for directory renames\n")

568

572

569

# generate a directory move map

573

# generate a directory move map

570

d1, d2 = c1.dirs(), c2.dirs()

574

d1, d2 = c1.dirs(), c2.dirs()

571

# Hack for adding '', which is not otherwise added, to d1 and d2

575

# Hack for adding '', which is not otherwise added, to d1 and d2

572

d1.addpath('/')

576

d1.addpath('/')

573

d2.addpath('/')

577

d2.addpath('/')

574

invalid = set()

578

invalid = set()

575

dirmove = {}

579

dirmove = {}

576

580

577

# examine each file copy for a potential directory move, which is

581

# examine each file copy for a potential directory move, which is

578

# when all the files in a directory are moved to a new directory

582

# when all the files in a directory are moved to a new directory

579

for dst, src in fullcopy.iteritems():

583

for dst, src in fullcopy.iteritems():

580

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

584

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

581

if dsrc in invalid:

585

if dsrc in invalid:

582

# already seen to be uninteresting

586

# already seen to be uninteresting

583

continue

587

continue

584

elif dsrc in d1 and ddst in d1:

588

elif dsrc in d1 and ddst in d1:

585

# directory wasn't entirely moved locally

589

# directory wasn't entirely moved locally

586

invalid.add(dsrc + "/")

590

invalid.add(dsrc + "/")

587

elif dsrc in d2 and ddst in d2:

591

elif dsrc in d2 and ddst in d2:

588

# directory wasn't entirely moved remotely

592

# directory wasn't entirely moved remotely

589

invalid.add(dsrc + "/")

593

invalid.add(dsrc + "/")

590

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

594

elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":

591

# files from the same directory moved to two different places

595

# files from the same directory moved to two different places

592

invalid.add(dsrc + "/")

596

invalid.add(dsrc + "/")

593

else:

597

else:

594

# looks good so far

598

# looks good so far

595

dirmove[dsrc + "/"] = ddst + "/"

599

dirmove[dsrc + "/"] = ddst + "/"

596

600

597

for i in invalid:

601

for i in invalid:

598

if i in dirmove:

602

if i in dirmove:

599

del dirmove[i]

603

del dirmove[i]

600

del d1, d2, invalid

604

del d1, d2, invalid

601

605

602

if not dirmove:

606

if not dirmove:

603

return copy, {}, diverge, renamedelete, {}

607

return copy, {}, diverge, renamedelete, {}

604

608

605

for d in dirmove:

609

for d in dirmove:

606

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

610

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

607

(d, dirmove[d]))

611

(d, dirmove[d]))

608

612

609

movewithdir = {}

613

movewithdir = {}

610

# check unaccounted nonoverlapping files against directory moves

614

# check unaccounted nonoverlapping files against directory moves

611

for f in u1r + u2r:

615

for f in u1r + u2r:

612

if f not in fullcopy:

616

if f not in fullcopy:

613

for d in dirmove:

617

for d in dirmove:

614

if f.startswith(d):

618

if f.startswith(d):

615

# new file added in a directory that was moved, move it

619

# new file added in a directory that was moved, move it

616

df = dirmove[d] + f[len(d):]

620

df = dirmove[d] + f[len(d):]

617

if df not in copy:

621

if df not in copy:

618

movewithdir[f] = df

622

movewithdir[f] = df

619

repo.ui.debug((" pending file src: '%s' -> "

623

repo.ui.debug((" pending file src: '%s' -> "

620

"dst: '%s'\n") % (f, df))

624

"dst: '%s'\n") % (f, df))

621

break

625

break

622

626

623

return copy, movewithdir, diverge, renamedelete, dirmove

627

return copy, movewithdir, diverge, renamedelete, dirmove

624

628

625

def _heuristicscopytracing(repo, c1, c2, base):

629

def _heuristicscopytracing(repo, c1, c2, base):

626

""" Fast copytracing using filename heuristics

630

""" Fast copytracing using filename heuristics

627

631

628

Assumes that moves or renames are of following two types:

632

Assumes that moves or renames are of following two types:

629

633

630

1) Inside a directory only (same directory name but different filenames)

634

1) Inside a directory only (same directory name but different filenames)

631

2) Move from one directory to another

635

2) Move from one directory to another

632

(same filenames but different directory names)

636

(same filenames but different directory names)

633

637

634

Works only when there are no merge commits in the "source branch".

638

Works only when there are no merge commits in the "source branch".

635

Source branch is commits from base up to c2 not including base.

639

Source branch is commits from base up to c2 not including base.

636

640

637

If merge is involved it fallbacks to _fullcopytracing().

641

If merge is involved it fallbacks to _fullcopytracing().

638

642

639

Can be used by setting the following config:

643

Can be used by setting the following config:

640

644

641

[experimental]

645

[experimental]

642

copytrace = heuristics

646

copytrace = heuristics

643

"""

647

"""

644

648

645

if c1.rev() is None:

649

if c1.rev() is None:

646

c1 = c1.p1()

650

c1 = c1.p1()

647

if c2.rev() is None:

651

if c2.rev() is None:

648

c2 = c2.p1()

652

c2 = c2.p1()

649

653

650

copies = {}

654

copies = {}

651

655

652

changedfiles = set()

656

changedfiles = set()

653

m1 = c1.manifest()

657

m1 = c1.manifest()

654

if not repo.revs('%d::%d', base.rev(), c2.rev()):

658

if not repo.revs('%d::%d', base.rev(), c2.rev()):

655

# If base is not in c2 branch, we switch to fullcopytracing

659

# If base is not in c2 branch, we switch to fullcopytracing

656

repo.ui.debug("switching to full copytracing as base is not "

660

repo.ui.debug("switching to full copytracing as base is not "

657

"an ancestor of c2\n")

661

"an ancestor of c2\n")

658

return _fullcopytracing(repo, c1, c2, base)

662

return _fullcopytracing(repo, c1, c2, base)

659

663

660

ctx = c2

664

ctx = c2

661

while ctx != base:

665

while ctx != base:

662

if len(ctx.parents()) == 2:

666

if len(ctx.parents()) == 2:

663

# To keep things simple let's not handle merges

667

# To keep things simple let's not handle merges

664

repo.ui.debug("switching to full copytracing because of merges\n")

668

repo.ui.debug("switching to full copytracing because of merges\n")

665

return _fullcopytracing(repo, c1, c2, base)

669

return _fullcopytracing(repo, c1, c2, base)

666

changedfiles.update(ctx.files())

670

changedfiles.update(ctx.files())

667

ctx = ctx.p1()

671

ctx = ctx.p1()

668

672

669

cp = _forwardcopies(base, c2)

673

cp = _forwardcopies(base, c2)

670

for dst, src in cp.iteritems():

674

for dst, src in cp.iteritems():

671

if src in m1:

675

if src in m1:

672

copies[dst] = src

676

copies[dst] = src

673

677

674

# file is missing if it isn't present in the destination, but is present in

678

# file is missing if it isn't present in the destination, but is present in

675

# the base and present in the source.

679

# the base and present in the source.

676

# Presence in the base is important to exclude added files, presence in the

680

# Presence in the base is important to exclude added files, presence in the

677

# source is important to exclude removed files.

681

# source is important to exclude removed files.

678

missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,

682

missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,

679

changedfiles)

683

changedfiles)

680

684

681

if missingfiles:

685

if missingfiles:

682

basenametofilename = collections.defaultdict(list)

686

basenametofilename = collections.defaultdict(list)

683

dirnametofilename = collections.defaultdict(list)

687

dirnametofilename = collections.defaultdict(list)

684

688

685

for f in m1.filesnotin(base.manifest()):

689

for f in m1.filesnotin(base.manifest()):

686

basename = os.path.basename(f)

690

basename = os.path.basename(f)

687

dirname = os.path.dirname(f)

691

dirname = os.path.dirname(f)

688

basenametofilename[basename].append(f)

692

basenametofilename[basename].append(f)

689

dirnametofilename[dirname].append(f)

693

dirnametofilename[dirname].append(f)

690

694

691

# in case of a rebase/graft, base may not be a common ancestor

695

# in case of a rebase/graft, base may not be a common ancestor

692

anc = c1.ancestor(c2)

696

anc = c1.ancestor(c2)

693

697

694

for f in missingfiles:

698

for f in missingfiles:

695

basename = os.path.basename(f)

699

basename = os.path.basename(f)

696

dirname = os.path.dirname(f)

700

dirname = os.path.dirname(f)

697

samebasename = basenametofilename[basename]

701

samebasename = basenametofilename[basename]

698

samedirname = dirnametofilename[dirname]

702

samedirname = dirnametofilename[dirname]

699

movecandidates = samebasename + samedirname

703

movecandidates = samebasename + samedirname

700

# f is guaranteed to be present in c2, that's why

704

# f is guaranteed to be present in c2, that's why

701

# c2.filectx(f) won't fail

705

# c2.filectx(f) won't fail

702

f2 = c2.filectx(f)

706

f2 = c2.filectx(f)

703

for candidate in movecandidates:

707

for candidate in movecandidates:

704

f1 = c1.filectx(candidate)

708

f1 = c1.filectx(candidate)

705

if _related(f1, f2, anc.rev()):

709

if _related(f1, f2, anc.rev()):

706

# if there are a few related copies then we'll merge

710

# if there are a few related copies then we'll merge

707

# changes into all of them. This matches the behaviour

711

# changes into all of them. This matches the behaviour

708

# of upstream copytracing

712

# of upstream copytracing

709

copies[candidate] = f

713

copies[candidate] = f

710

714

711

return copies, {}, {}, {}, {}

715

return copies, {}, {}, {}, {}

712

716

713

def _related(f1, f2, limit):

717

def _related(f1, f2, limit):

714

"""return True if f1 and f2 filectx have a common ancestor

718

"""return True if f1 and f2 filectx have a common ancestor

715

719

716

Walk back to common ancestor to see if the two files originate

720

Walk back to common ancestor to see if the two files originate

717

from the same file. Since workingfilectx's rev() is None it messes

721

from the same file. Since workingfilectx's rev() is None it messes

718

up the integer comparison logic, hence the pre-step check for

722

up the integer comparison logic, hence the pre-step check for

719

None (f1 and f2 can only be workingfilectx's initially).

723

None (f1 and f2 can only be workingfilectx's initially).

720

"""

724

"""

721

725

722

if f1 == f2:

726

if f1 == f2:

723

return f1 # a match

727

return f1 # a match

724

728

725

g1, g2 = f1.ancestors(), f2.ancestors()

729

g1, g2 = f1.ancestors(), f2.ancestors()

726

try:

730

try:

727

f1r, f2r = f1.linkrev(), f2.linkrev()

731

f1r, f2r = f1.linkrev(), f2.linkrev()

728

732

729

if f1r is None:

733

if f1r is None:

730

f1 = next(g1)

734

f1 = next(g1)

731

if f2r is None:

735

if f2r is None:

732

f2 = next(g2)

736

f2 = next(g2)

733

737

734

while True:

738

while True:

735

f1r, f2r = f1.linkrev(), f2.linkrev()

739

f1r, f2r = f1.linkrev(), f2.linkrev()

736

if f1r > f2r:

740

if f1r > f2r:

737

f1 = next(g1)

741

f1 = next(g1)

738

elif f2r > f1r:

742

elif f2r > f1r:

739

f2 = next(g2)

743

f2 = next(g2)

740

elif f1 == f2:

744

elif f1 == f2:

741

return f1 # a match

745

return f1 # a match

742

elif f1r == f2r or f1r < limit or f2r < limit:

746

elif f1r == f2r or f1r < limit or f2r < limit:

743

return False # copy no longer relevant

747

return False # copy no longer relevant

744

except StopIteration:

748

except StopIteration:

745

return False

749

return False

746

750

747

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

751

def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):

748

"""

752

"""

749

check possible copies of f from msrc to mdst

753

check possible copies of f from msrc to mdst

750

754

751

srcctx = starting context for f in msrc

755

srcctx = starting context for f in msrc

752

dstctx = destination context for f in mdst

756

dstctx = destination context for f in mdst

753

f = the filename to check (as in msrc)

757

f = the filename to check (as in msrc)

754

base = the changectx used as a merge base

758

base = the changectx used as a merge base

755

tca = topological common ancestor for graft-like scenarios

759

tca = topological common ancestor for graft-like scenarios

756

remotebase = True if base is outside tca::srcctx, False otherwise

760

remotebase = True if base is outside tca::srcctx, False otherwise

757

limit = the rev number to not search beyond

761

limit = the rev number to not search beyond

758

data = dictionary of dictionary to store copy data. (see mergecopies)

762

data = dictionary of dictionary to store copy data. (see mergecopies)

759

763

760

note: limit is only an optimization, and provides no guarantee that

764

note: limit is only an optimization, and provides no guarantee that

761

irrelevant revisions will not be visited

765

irrelevant revisions will not be visited

762

there is no easy way to make this algorithm stop in a guaranteed way

766

there is no easy way to make this algorithm stop in a guaranteed way

763

once it "goes behind a certain revision".

767

once it "goes behind a certain revision".

764

"""

768

"""

765

769

766

msrc = srcctx.manifest()

770

msrc = srcctx.manifest()

767

mdst = dstctx.manifest()

771

mdst = dstctx.manifest()

768

mb = base.manifest()

772

mb = base.manifest()

769

mta = tca.manifest()

773

mta = tca.manifest()

770

# Might be true if this call is about finding backward renames,

774

# Might be true if this call is about finding backward renames,

771

# This happens in the case of grafts because the DAG is then rotated.

775

# This happens in the case of grafts because the DAG is then rotated.

772

# If the file exists in both the base and the source, we are not looking

776

# If the file exists in both the base and the source, we are not looking

773

# for a rename on the source side, but on the part of the DAG that is

777

# for a rename on the source side, but on the part of the DAG that is

774

# traversed backwards.

778

# traversed backwards.

775

#

779

#

776

# In the case there is both backward and forward renames (before and after

780

# In the case there is both backward and forward renames (before and after

777

# the base) this is more complicated as we must detect a divergence.

781

# the base) this is more complicated as we must detect a divergence.

778

# We use 'backwards = False' in that case.

782

# We use 'backwards = False' in that case.

779

backwards = not remotebase and base != tca and f in mb

783

backwards = not remotebase and base != tca and f in mb

780

getsrcfctx = _makegetfctx(srcctx)

784

getsrcfctx = _makegetfctx(srcctx)

781

getdstfctx = _makegetfctx(dstctx)

785

getdstfctx = _makegetfctx(dstctx)

782

786

783

if msrc[f] == mb.get(f) and not remotebase:

787

if msrc[f] == mb.get(f) and not remotebase:

784

# Nothing to merge

788

# Nothing to merge

785

return

789

return

786

790

787

of = None

791

of = None

788

seen = {f}

792

seen = {f}

789

for oc in getsrcfctx(f, msrc[f]).ancestors():

793

for oc in getsrcfctx(f, msrc[f]).ancestors():

790

ocr = oc.linkrev()

794

ocr = oc.linkrev()

791

of = oc.path()

795

of = oc.path()

792

if of in seen:

796

if of in seen:

793

# check limit late - grab last rename before

797

# check limit late - grab last rename before

794

if ocr < limit:

798

if ocr < limit:

795

break

799

break

796

continue

800

continue

797

seen.add(of)

801

seen.add(of)

798

802

799

# remember for dir rename detection

803

# remember for dir rename detection

800

if backwards:

804

if backwards:

801

data['fullcopy'][of] = f # grafting backwards through renames

805

data['fullcopy'][of] = f # grafting backwards through renames

802

else:

806

else:

803

data['fullcopy'][f] = of

807

data['fullcopy'][f] = of

804

if of not in mdst:

808

if of not in mdst:

805

continue # no match, keep looking

809

continue # no match, keep looking

806

if mdst[of] == mb.get(of):

810

if mdst[of] == mb.get(of):

807

return # no merge needed, quit early

811

return # no merge needed, quit early

808

c2 = getdstfctx(of, mdst[of])

812

c2 = getdstfctx(of, mdst[of])

809

# c2 might be a plain new file on added on destination side that is

813

# c2 might be a plain new file on added on destination side that is

810

# unrelated to the droids we are looking for.

814

# unrelated to the droids we are looking for.

811

cr = _related(oc, c2, tca.rev())

815

cr = _related(oc, c2, tca.rev())

812

if cr and (of == f or of == c2.path()): # non-divergent

816

if cr and (of == f or of == c2.path()): # non-divergent

813

if backwards:

817

if backwards:

814

data['copy'][of] = f

818

data['copy'][of] = f

815

elif of in mb:

819

elif of in mb:

816

data['copy'][f] = of

820

data['copy'][f] = of

817

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

821

elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename

818

data['copy'][of] = f

822

data['copy'][of] = f

819

del data['fullcopy'][f]

823

del data['fullcopy'][f]

820

data['fullcopy'][of] = f

824

data['fullcopy'][of] = f

821

else: # divergence w.r.t. graft CA on one side of topological CA

825

else: # divergence w.r.t. graft CA on one side of topological CA

822

for sf in seen:

826

for sf in seen:

823

if sf in mb:

827

if sf in mb:

824

assert sf not in data['diverge']

828

assert sf not in data['diverge']

825

data['diverge'][sf] = [f, of]

829

data['diverge'][sf] = [f, of]

826

break

830

break

827

return

831

return

828

832

829

if of in mta:

833

if of in mta:

830

if backwards or remotebase:

834

if backwards or remotebase:

831

data['incomplete'][of] = f

835

data['incomplete'][of] = f

832

else:

836

else:

833

for sf in seen:

837

for sf in seen:

834

if sf in mb:

838

if sf in mb:

835

if tca == base:

839

if tca == base:

836

data['diverge'].setdefault(sf, []).append(f)

840

data['diverge'].setdefault(sf, []).append(f)

837

else:

841

else:

838

data['incompletediverge'][sf] = [of, f]

842

data['incompletediverge'][sf] = [of, f]

839

return

843

return

840

844

841

def duplicatecopies(repo, rev, fromrev, skiprev=None):

845

def duplicatecopies(repo, rev, fromrev, skiprev=None):

842

'''reproduce copies from fromrev to rev in the dirstate

846

'''reproduce copies from fromrev to rev in the dirstate

843

847

844

If skiprev is specified, it's a revision that should be used to

848

If skiprev is specified, it's a revision that should be used to

845

filter copy records. Any copies that occur between fromrev and

849

filter copy records. Any copies that occur between fromrev and

846

skiprev will not be duplicated, even if they appear in the set of

850

skiprev will not be duplicated, even if they appear in the set of

847

copies between fromrev and rev.

851

copies between fromrev and rev.

848

'''

852

'''

849

exclude = {}

853

exclude = {}

850

if (skiprev is not None and

854

if (skiprev is not None and

851

repo.ui.config('experimental', 'copytrace') != 'off'):

855

repo.ui.config('experimental', 'copytrace') != 'off'):

852

# copytrace='off' skips this line, but not the entire function because

856

# copytrace='off' skips this line, but not the entire function because

853

# the line below is O(size of the repo) during a rebase, while the rest

857

# the line below is O(size of the repo) during a rebase, while the rest

854

# of the function is much faster (and is required for carrying copy

858

# of the function is much faster (and is required for carrying copy

855

# metadata across the rebase anyway).

859

# metadata across the rebase anyway).

856

exclude = pathcopies(repo[fromrev], repo[skiprev])

860

exclude = pathcopies(repo[fromrev], repo[skiprev])

857

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

861

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

858

# copies.pathcopies returns backward renames, so dst might not

862

# copies.pathcopies returns backward renames, so dst might not

859

# actually be in the dirstate

863

# actually be in the dirstate

860

if dst in exclude:

864

if dst in exclude:

861

continue

865

continue

862

if repo.dirstate[dst] in "nma":

866

if repo.dirstate[dst] in "nma":

863

repo.dirstate.copy(src, dst)

867

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 scmutil,
                 util,
             )
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def _forwardcopies(a, b, match=None):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = scmutil.matchfiles(a._repo, b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a),
                               _forwardcopies(a, y, match=match))
             def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
                 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
                 and c2. This is its own function so extensions can easily wrap this call
                 to see what files mergecopies is about to process.
                 Even though c1 and c2 are not used in this function, they are useful in
                 other extensions for being able to read the file nodes of the changed files.
                 "baselabel" can be passed to help distinguish the multiple computations
                 done in the graft case.
                 """
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if baselabel:
                     header += ' (from %s)' % baselabel
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 return u1, u2
             def _makegetfctx(ctx):
                 """return a 'getfctx' function suitable for _checkcopies usage
                 We have to re-setup the function building 'filectx' for each
                 '_checkcopies' to ensure the linkrev adjustment is properly setup for
                 each. Linkrev adjustment is important to avoid bug in rename
                 detection. Moreover, having a proper '_ancestrycontext' setup ensures
                 the performance impact of this adjustment is kept limited. Without it,
                 each file could do a full dag traversal making the time complexity of
                 the operation explode (see issue4537).
                 This function exists here mostly to limit the impact on stable. Feel
                 free to refactor on default.
                 """
                 rev = ctx.rev()
                 repo = ctx._repo
                 ac = getattr(ctx, '_ancestrycontext', None)
                 if ac is None:
                     revs = [rev]
                     if rev is None:
                         revs = [p.rev() for p in ctx.parents()]
                     ac = repo.changelog.ancestors(revs, inclusive=True)
                     ctx._ancestrycontext = ac
                 def makectx(f, n):
                     if n in node.wdirnodes:  # in a working context?
                         if ctx.rev() is None:
                             return ctx.filectx(f)
                         return repo[None][f]
                     fctx = repo.filectx(f, fileid=n)
                     # setup only needed for filectx not create from a changectx
                     fctx._ancestrycontext = ac
                     fctx._descendantrev = rev
                     return fctx
                 return util.lrucachefunc(makectx)
             def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
                 """combine partial copy paths"""
                 remainder = {}
                 for f in copyfrom:
                     if f in copyto:
                         finalcopy[copyto[f]] = copyfrom[f]
                         del copyto[f]
                 for f in incompletediverge:
                     assert f not in diverge
                     ic = incompletediverge[f]
                     if ic[0] in copyto:
                         diverge[f] = [copyto[ic[0]], ic[1]]
                     else:
                         remainder[f] = ic
                 return remainder
             def mergecopies(repo, c1, c2, base):
                 """
                 The function calling different copytracing algorithms on the basis of config
                 which find moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'off':
                     return {}, {}, {}, {}, {}
                 elif copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
+                `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
+                number of changesets from c1 to base such that if number of changesets are
+                more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 # In certain scenarios (e.g. graft, update or rebase), base can be
                 # overridden We still need to know a real common ancestor in this case We
                 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
                 # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
                 # caller may not know if the revision passed in lieu of the CA is a genuine
                 # common ancestor or not without explicitly checking it, it's better to
                 # determine that here.
                 #
                 # base.descendant(wc) and base.descendant(base) are False, work around that
                 _c1 = c1.p1() if c1.rev() is None else c1
                 _c2 = c2.p1() if c2.rev() is None else c2
                 # an endpoint is "dirty" if it isn't a descendant of the merge base
                 # if we have a dirty endpoint, we need to trigger graft logic, and also
                 # keep track of which endpoint is dirty
                 dirtyc1 = not (base == _c1 or base.descendant(_c1))
                 dirtyc2 = not (base == _c2 or base.descendant(_c2))
                 graft = dirtyc1 or dirtyc2
                 tca = base
                 if graft:
                     tca = _c1.ancestor(_c2)
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}, {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 # gather data from _checkcopies:
                 # - diverge = record all diverges in this dict
                 # - copy = record all non-divergent copies in this dict
                 # - fullcopy = record all copies in this dict
                 # - incomplete = record non-divergent partial copies here
                 # - incompletediverge = record divergent partial copies here
                 diverge = {} # divergence data is shared
                 incompletediverge  = {}
                 data1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 data2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': diverge,
                          'incompletediverge': incompletediverge,
                         }
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb)
                 addedinm2 = m2.filesnotin(mb)
                 bothnew = sorted(addedinm1 & addedinm2)
                 if tca == base:
                     # unmatched file from base
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
                     u1u, u2u = u1r, u2r
                 else:
                     # unmatched file from base (DAG rotation in the graft case)
                     u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
                                                   baselabel='base')
                     # unmatched file from topological common ancestors (no DAG rotation)
                     # need to recompute this for directory move handling when grafting
                     mta = tca.manifest()
                     u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
                                                                 m2.filesnotin(mta),
                                                   baselabel='topological common ancestor')
                 for f in u1u:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
                 for f in u2u:
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
                 copy = dict(data1['copy'])
                 copy.update(data2['copy'])
                 fullcopy = dict(data1['fullcopy'])
                 fullcopy.update(data2['fullcopy'])
                 if dirtyc1:
                     _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
                                    incompletediverge)
                 else:
                     _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
                                    incompletediverge)
                 renamedelete = {}
                 renamedeleteset = set()
                 divergeset = set()
                 for of, fl in list(diverge.items()):
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedeleteset.update(fl) # reverse map for below
                     else:
                         divergeset.update(fl) # reverse map for below
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge = {}
                 bothincompletediverge = {}
                 remainder = {}
                 both1 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 both2 = {'copy': {},
                          'fullcopy': {},
                          'incomplete': {},
                          'diverge': bothdiverge,
                          'incompletediverge': bothincompletediverge
                         }
                 for f in bothnew:
                     _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
                     _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
                 if dirtyc1:
                     # incomplete copies may only be found on the "dirty" side for bothnew
                     assert not both2['incomplete']
                     remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 elif dirtyc2:
                     assert not both1['incomplete']
                     remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
                                                bothincompletediverge)
                 else:
                     # incomplete copies and divergences can't happen outside grafts
                     assert not both1['incomplete']
                     assert not both2['incomplete']
                     assert not bothincompletediverge
                 for f in remainder:
                     assert f not in bothdiverge
                     ic = remainder[f]
                     if ic[0] in (m1 if dirtyc1 else m2):
                         # backed-out rename on one side, but watch out for deleted files
                         bothdiverge[f] = ic
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc + "/")
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc + "/")
                     elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
                         # files from the same directory moved to two different places
                         invalid.add(dsrc + "/")
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1r + u2r:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
                                       changedfiles)
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     # in case of a rebase/graft, base may not be a common ancestor
                     anc = c1.ancestor(c2)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2, anc.rev()):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2, limit):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return f1 # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         elif f1 == f2:
                             return f1 # a match
                         elif f1r == f2r or f1r < limit or f2r < limit:
                             return False # copy no longer relevant
                 except StopIteration:
                     return False
             def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
                 """
                 check possible copies of f from msrc to mdst
                 srcctx = starting context for f in msrc
                 dstctx = destination context for f in mdst
                 f = the filename to check (as in msrc)
                 base = the changectx used as a merge base
                 tca = topological common ancestor for graft-like scenarios
                 remotebase = True if base is outside tca::srcctx, False otherwise
                 limit = the rev number to not search beyond
                 data = dictionary of dictionary to store copy data. (see mergecopies)
                 note: limit is only an optimization, and provides no guarantee that
                 irrelevant revisions will not be visited
                 there is no easy way to make this algorithm stop in a guaranteed way
                 once it "goes behind a certain revision".
                 """
                 msrc = srcctx.manifest()
                 mdst = dstctx.manifest()
                 mb = base.manifest()
                 mta = tca.manifest()
                 # Might be true if this call is about finding backward renames,
                 # This happens in the case of grafts because the DAG is then rotated.
                 # If the file exists in both the base and the source, we are not looking
                 # for a rename on the source side, but on the part of the DAG that is
                 # traversed backwards.
                 #
                 # In the case there is both backward and forward renames (before and after
                 # the base) this is more complicated as we must detect a divergence.
                 # We use 'backwards = False' in that case.
                 backwards = not remotebase and base != tca and f in mb
                 getsrcfctx = _makegetfctx(srcctx)
                 getdstfctx = _makegetfctx(dstctx)
                 if msrc[f] == mb.get(f) and not remotebase:
                     # Nothing to merge
                     return
                 of = None
                 seen = {f}
                 for oc in getsrcfctx(f, msrc[f]).ancestors():
                     ocr = oc.linkrev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     # remember for dir rename detection
                     if backwards:
                         data['fullcopy'][of] = f # grafting backwards through renames
                     else:
                         data['fullcopy'][f] = of
                     if of not in mdst:
                         continue # no match, keep looking
                     if mdst[of] == mb.get(of):
                         return # no merge needed, quit early
                     c2 = getdstfctx(of, mdst[of])
                     # c2 might be a plain new file on added on destination side that is
                     # unrelated to the droids we are looking for.
                     cr = _related(oc, c2, tca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         if backwards:
                             data['copy'][of] = f
                         elif of in mb:
                             data['copy'][f] = of
                         elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
                             data['copy'][of] = f
                             del data['fullcopy'][f]
                             data['fullcopy'][of] = f
                         else: # divergence w.r.t. graft CA on one side of topological CA
                             for sf in seen:
                                 if sf in mb:
                                     assert sf not in data['diverge']
                                     data['diverge'][sf] = [f, of]
                                     break
                         return
                 if of in mta:
                     if backwards or remotebase:
                         data['incomplete'][of] = f
                     else:
                         for sf in seen:
                             if sf in mb:
                                 if tca == base:
                                     data['diverge'].setdefault(sf, []).append(f)
                                 else:
                                     data['incompletediverge'][sf] = [of, f]
                                 return
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if (skiprev is not None and
                     repo.ui.config('experimental', 'copytrace') != 'off'):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)