upstream/mercurial-mirror Commit - r42445:d1c7446d

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import collections

10

import collections

11

import heapq

11

import heapq

12

import os

12

import os

13

14

from .i18n import _

14

from .i18n import _

15

16

from . import (

16

from . import (

17

match as matchmod,

17

match as matchmod,

18

node,

18

node,

19

pathutil,

19

pathutil,

20

util,

20

util,

21

)

21

)

22

from .utils import (

22

from .utils import (

23

stringutil,

23

stringutil,

24

)

24

)

25

26

def _findlimit(repo, ctxa, ctxb):

26

def _findlimit(repo, ctxa, ctxb):

27

"""

27

"""

28

Find the last revision that needs to be checked to ensure that a full

28

Find the last revision that needs to be checked to ensure that a full

29

transitive closure for file copies can be properly calculated.

29

transitive closure for file copies can be properly calculated.

30

Generally, this means finding the earliest revision number that's an

30

Generally, this means finding the earliest revision number that's an

31

ancestor of a or b but not both, except when a or b is a direct descendent

31

ancestor of a or b but not both, except when a or b is a direct descendent

32

of the other, in which case we can return the minimum revnum of a and b.

32

of the other, in which case we can return the minimum revnum of a and b.

33

"""

33

"""

34

35

# basic idea:

35

# basic idea:

36

# - mark a and b with different sides

36

# - mark a and b with different sides

37

# - if a parent's children are all on the same side, the parent is

37

# - if a parent's children are all on the same side, the parent is

38

# on that side, otherwise it is on no side

38

# on that side, otherwise it is on no side

39

# - walk the graph in topological order with the help of a heap;

39

# - walk the graph in topological order with the help of a heap;

40

# - add unseen parents to side map

40

# - add unseen parents to side map

41

# - clear side of any parent that has children on different sides

41

# - clear side of any parent that has children on different sides

42

# - track number of interesting revs that might still be on a side

42

# - track number of interesting revs that might still be on a side

43

# - track the lowest interesting rev seen

43

# - track the lowest interesting rev seen

44

# - quit when interesting revs is zero

44

# - quit when interesting revs is zero

45

46

cl = repo.changelog

46

cl = repo.changelog

47

wdirparents = None

47

wdirparents = None

48

a = ctxa.rev()

48

a = ctxa.rev()

49

b = ctxb.rev()

49

b = ctxb.rev()

50

if a is None:

50

if a is None:

51

wdirparents = (ctxa.p1(), ctxa.p2())

51

wdirparents = (ctxa.p1(), ctxa.p2())

52

a = node.wdirrev

52

a = node.wdirrev

53

if b is None:

53

if b is None:

54

assert not wdirparents

54

assert not wdirparents

55

wdirparents = (ctxb.p1(), ctxb.p2())

55

wdirparents = (ctxb.p1(), ctxb.p2())

56

b = node.wdirrev

56

b = node.wdirrev

57

58

side = {a: -1, b: 1}

58

side = {a: -1, b: 1}

59

visit = [-a, -b]

59

visit = [-a, -b]

60

heapq.heapify(visit)

60

heapq.heapify(visit)

61

interesting = len(visit)

61

interesting = len(visit)

62

limit = node.wdirrev

62

limit = node.wdirrev

63

64

while interesting:

64

while interesting:

65

r = -heapq.heappop(visit)

65

r = -heapq.heappop(visit)

66

if r == node.wdirrev:

66

if r == node.wdirrev:

67

parents = [pctx.rev() for pctx in wdirparents]

67

parents = [pctx.rev() for pctx in wdirparents]

68

else:

68

else:

69

parents = cl.parentrevs(r)

69

parents = cl.parentrevs(r)

70

if parents[1] == node.nullrev:

70

if parents[1] == node.nullrev:

71

parents = parents[:1]

71

parents = parents[:1]

72

for p in parents:

72

for p in parents:

73

if p not in side:

73

if p not in side:

74

# first time we see p; add it to visit

74

# first time we see p; add it to visit

75

side[p] = side[r]

75

side[p] = side[r]

76

if side[p]:

76

if side[p]:

77

interesting += 1

77

interesting += 1

78

heapq.heappush(visit, -p)

78

heapq.heappush(visit, -p)

79

elif side[p] and side[p] != side[r]:

79

elif side[p] and side[p] != side[r]:

80

# p was interesting but now we know better

80

# p was interesting but now we know better

81

side[p] = 0

81

side[p] = 0

82

interesting -= 1

82

interesting -= 1

83

if side[r]:

83

if side[r]:

84

limit = r # lowest rev visited

84

limit = r # lowest rev visited

85

interesting -= 1

85

interesting -= 1

86

87

# Consider the following flow (see test-commit-amend.t under issue4405):

87

# Consider the following flow (see test-commit-amend.t under issue4405):

88

# 1/ File 'a0' committed

88

# 1/ File 'a0' committed

89

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

89

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

90

# 3/ Move back to first commit

90

# 3/ Move back to first commit

91

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

91

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

92

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

92

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

93

#

93

#

94

# During the amend in step five, we will be in this state:

94

# During the amend in step five, we will be in this state:

95

#

95

#

96

# @ 3 temporary amend commit for a1-amend

96

# @ 3 temporary amend commit for a1-amend

97

# |

97

# |

98

# o 2 a1-amend

98

# o 2 a1-amend

99

# |

99

# |

100

# | o 1 a1

100

# | o 1 a1

101

# |/

101

# |/

102

# o 0 a0

102

# o 0 a0

103

#

103

#

104

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

104

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

105

# yet the filelog has the copy information in rev 1 and we will not look

105

# yet the filelog has the copy information in rev 1 and we will not look

106

# back far enough unless we also look at the a and b as candidates.

106

# back far enough unless we also look at the a and b as candidates.

107

# This only occurs when a is a descendent of b or visa-versa.

107

# This only occurs when a is a descendent of b or visa-versa.

108

return min(limit, a, b)

108

return min(limit, a, b)

109

110

def _chain(src, dst, a, b):

110

def _chain(src, dst, a, b):

111

"""chain two sets of copies 'a' and 'b'"""

111

"""chain two sets of copies 'a' and 'b'"""

112

113

# When chaining copies in 'a' (from 'src' via some other commit 'mid') with

113

# When chaining copies in 'a' (from 'src' via some other commit 'mid') with

114

# copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the

114

# copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the

115

# following table (not including trivial cases). For example, case 2 is

115

# following table (not including trivial cases). For example, case 2 is

116

# where a file existed in 'src' and remained under that name in 'mid' and

116

# where a file existed in 'src' and remained under that name in 'mid' and

117

# then was renamed between 'mid' and 'dst'.

117

# then was renamed between 'mid' and 'dst'.

118

#

118

#

119

# case src mid dst result

119

# case src mid dst result

120

# 1 x y - -

120

# 1 x y - -

121

# 2 x y y x->y

121

# 2 x y y x->y

122

# 3 x y x -

122

# 3 x y x -

123

# 4 x y z x->z

123

# 4 x y z x->z

124

# 5 - x y -

124

# 5 - x y -

125

# 6 x x y x->y

125

# 6 x x y x->y

126

127

# Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove

127

# Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove

128

# case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be

128

# case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be

129

# overwritten later, and case 3 will be removed later.

129

# overwritten later, and case 3 will be removed later.

130

t = a.copy()

130

t = a.copy()

131

for k, v in b.iteritems():

131

for k, v in b.iteritems():

132

if v in t:

132

if v in t:

133

# Found a chain, i.e. cases 3 & 4. We'll remove case 3 later.

133

# Found a chain, i.e. cases 3 & 4. We'll remove case 3 later.

134

t[k] = t[v]

134

t[k] = t[v]

135

else:

135

else:

136

# Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.

136

# Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.

137

t[k] = v

137

t[k] = v

138

139

for k, v in list(t.items()):

139

for k, v in list(t.items()):

140

# remove copies from files that didn't exist, i.e. case 5

140

# remove copies from files that didn't exist, i.e. case 5

141

if v not in src:

141

if v not in src:

142

del t[k]

142

del t[k]

143

# remove criss-crossed copies, i.e. case 3

143

# remove criss-crossed copies, i.e. case 3

144

elif k in src and v in dst:

144

elif k in src and v in dst:

145

del t[k]

145

del t[k]

146

# remove copies to files that were then removed, i.e. case 1

146

# remove copies to files that were then removed, i.e. case 1

147

# and file 'y' in cases 3 & 4 (in case of rename)

147

# and file 'y' in cases 3 & 4 (in case of rename)

148

elif k not in dst:

148

elif k not in dst:

149

del t[k]

149

del t[k]

150

151

return t

151

return t

152

153

def _tracefile(fctx, am, limit):

153

def _tracefile(fctx, am, limit):

154

"""return file context that is the ancestor of fctx present in ancestor

154

"""return file context that is the ancestor of fctx present in ancestor

155

manifest am, stopping after the first ancestor lower than limit"""

155

manifest am, stopping after the first ancestor lower than limit"""

156

157

for f in fctx.ancestors():

157

for f in fctx.ancestors():

158

if am.get(f.path(), None) == f.filenode():

158

if am.get(f.path(), None) == f.filenode():

159

return f

159

return f

160

if not f.isintroducedafter(limit):

160

if not f.isintroducedafter(limit):

161

return None

161

return None

162

163

def _dirstatecopies(repo, match=None):

163

def _dirstatecopies(repo, match=None):

164

ds = repo.dirstate

164

ds = repo.dirstate

165

c = ds.copies().copy()

165

c = ds.copies().copy()

166

for k in list(c):

166

for k in list(c):

167

if ds[k] not in 'anm' or (match and not match(k)):

167

if ds[k] not in 'anm' or (match and not match(k)):

168

del c[k]

168

del c[k]

169

return c

169

return c

170

171

def _computeforwardmissing(a, b, match=None):

171

def _computeforwardmissing(a, b, match=None):

172

"""Computes which files are in b but not a.

172

"""Computes which files are in b but not a.

173

This is its own function so extensions can easily wrap this call to see what

173

This is its own function so extensions can easily wrap this call to see what

174

files _forwardcopies is about to process.

174

files _forwardcopies is about to process.

175

"""

175

"""

176

ma = a.manifest()

176

ma = a.manifest()

177

mb = b.manifest()

177

mb = b.manifest()

178

return mb.filesnotin(ma, match=match)

178

return mb.filesnotin(ma, match=match)

179

180

def usechangesetcentricalgo(repo):

180

def usechangesetcentricalgo(repo):

181

"""Checks if we should use changeset-centric copy algorithms"""

181

"""Checks if we should use changeset-centric copy algorithms"""

182

return (repo.ui.config('experimental', 'copies.read-from') in

182

return (repo.ui.config('experimental', 'copies.read-from') in

183

('changeset-only', 'compatibility'))

183

('changeset-only', 'compatibility'))

184

185

def _committedforwardcopies(a, b, match):

185

def _committedforwardcopies(a, b, match):

186

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

186

"""Like _forwardcopies(), but b.rev() cannot be None (working copy)"""

187

# files might have to be traced back to the fctx parent of the last

187

# files might have to be traced back to the fctx parent of the last

188

# one-side-only changeset, but not further back than that

188

# one-side-only changeset, but not further back than that

189

repo = a._repo

189

repo = a._repo

190

191

if usechangesetcentricalgo(repo):

191

if usechangesetcentricalgo(repo):

192

return _changesetforwardcopies(a, b, match)

192

return _changesetforwardcopies(a, b, match)

193

194

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

194

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

195

dbg = repo.ui.debug

195

dbg = repo.ui.debug

196

if debug:

196

if debug:

197

dbg('debug.copies: looking into rename from %s to %s\n'

197

dbg('debug.copies: looking into rename from %s to %s\n'

198

% (a, b))

198

% (a, b))

199

limit = _findlimit(repo, a, b)

199

limit = _findlimit(repo, a, b)

200

if debug:

200

if debug:

201

dbg('debug.copies: search limit: %d\n' % limit)

201

dbg('debug.copies: search limit: %d\n' % limit)

202

am = a.manifest()

202

am = a.manifest()

203

204

# find where new files came from

204

# find where new files came from

205

# we currently don't try to find where old files went, too expensive

205

# we currently don't try to find where old files went, too expensive

206

# this means we can miss a case like 'hg rm b; hg cp a b'

206

# this means we can miss a case like 'hg rm b; hg cp a b'

207

cm = {}

207

cm = {}

208

209

# Computing the forward missing is quite expensive on large manifests, since

209

# Computing the forward missing is quite expensive on large manifests, since

210

# it compares the entire manifests. We can optimize it in the common use

210

# it compares the entire manifests. We can optimize it in the common use

211

# case of computing what copies are in a commit versus its parent (like

211

# case of computing what copies are in a commit versus its parent (like

212

# during a rebase or histedit). Note, we exclude merge commits from this

212

# during a rebase or histedit). Note, we exclude merge commits from this

213

# optimization, since the ctx.files() for a merge commit is not correct for

213

# optimization, since the ctx.files() for a merge commit is not correct for

214

# this comparison.

214

# this comparison.

215

forwardmissingmatch = match

215

forwardmissingmatch = match

216

if b.p1() == a and b.p2().node() == node.nullid:

216

if b.p1() == a and b.p2().node() == node.nullid:

217

filesmatcher = matchmod.exact(b.files())

217

filesmatcher = matchmod.exact(b.files())

218

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

218

forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)

219

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

219

missing = _computeforwardmissing(a, b, match=forwardmissingmatch)

220

221

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

221

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

222

223

if debug:

223

if debug:

224

dbg('debug.copies: missing files to search: %d\n' % len(missing))

224

dbg('debug.copies: missing files to search: %d\n' % len(missing))

225

226

for f in sorted(missing):

226

for f in sorted(missing):

227

if debug:

227

if debug:

228

dbg('debug.copies: tracing file: %s\n' % f)

228

dbg('debug.copies: tracing file: %s\n' % f)

229

fctx = b[f]

229

fctx = b[f]

230

fctx._ancestrycontext = ancestrycontext

230

fctx._ancestrycontext = ancestrycontext

231

232

if debug:

232

if debug:

233

start = util.timer()

233

start = util.timer()

234

ofctx = _tracefile(fctx, am, limit)

234

ofctx = _tracefile(fctx, am, limit)

235

if ofctx:

235

if ofctx:

236

if debug:

236

if debug:

237

dbg('debug.copies: rename of: %s\n' % ofctx._path)

237

dbg('debug.copies: rename of: %s\n' % ofctx._path)

238

cm[f] = ofctx.path()

238

cm[f] = ofctx.path()

239

if debug:

239

if debug:

240

dbg('debug.copies: time: %f seconds\n'

240

dbg('debug.copies: time: %f seconds\n'

241

% (util.timer() - start))

241

% (util.timer() - start))

242

return cm

242

return cm

243

244

def _changesetforwardcopies(a, b, match):

244

def _changesetforwardcopies(a, b, match):

245

if a.rev() == node.nullrev:

245

if a.rev() == node.nullrev:

246

return {}

246

return {}

247

248

repo = a.repo()

248

repo = a.repo()

249

children = {}

249

children = {}

250

cl = repo.changelog

250

cl = repo.changelog

251

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

251

missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])

252

for r in missingrevs:

252

for r in missingrevs:

253

for p in cl.parentrevs(r):

253

for p in cl.parentrevs(r):

254

if p == node.nullrev:

254

if p == node.nullrev:

255

continue

255

continue

256

if p not in children:

256

if p not in children:

257

children[p] = [r]

257

children[p] = [r]

258

else:

258

else:

259

children[p].append(r)

259

children[p].append(r)

260

261

roots = set(children) - set(missingrevs)

261

roots = set(children) - set(missingrevs)

262

# 'work' contains 3-tuples of a (revision number, parent number, copies).

262

# 'work' contains 3-tuples of a (revision number, parent number, copies).

263

# The parent number is only used for knowing which parent the copies dict

263

# The parent number is only used for knowing which parent the copies dict

264

# came from.

264

# came from.

265

work = [(r, 1, {}) for r in roots]

265

work = [(r, 1, {}) for r in roots]

266

heapq.heapify(work)

266

heapq.heapify(work)

267

while work:

267

while work:

268

r, i1, copies1 = heapq.heappop(work)

268

r, i1, copies1 = heapq.heappop(work)

269

if work and work[0][0] == r:

269

if work and work[0][0] == r:

270

# We are tracing copies from both parents

270

# We are tracing copies from both parents

271

r, i2, copies2 = heapq.heappop(work)

271

r, i2, copies2 = heapq.heappop(work)

272

copies = {}

272

copies = {}

273

ctx = repo[r]

273

ctx = repo[r]

274

p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()

274

p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()

275

allcopies = set(copies1) | set(copies2)

275

allcopies = set(copies1) | set(copies2)

276

# TODO: perhaps this filtering should be done as long as ctx

276

# TODO: perhaps this filtering should be done as long as ctx

277

# is merge, whether or not we're tracing from both parent.

277

# is merge, whether or not we're tracing from both parent.

278

for dst in allcopies:

278

for dst in allcopies:

279

if not match(dst):

279

if not match(dst):

280

continue

280

continue

281

if dst not in copies2:

281

if dst not in copies2:

282

# Copied on p1 side: mark as copy from p1 side if it didn't

282

# Copied on p1 side: mark as copy from p1 side if it didn't

283

# already exist on p2 side

283

# already exist on p2 side

284

if dst not in p2man:

284

if dst not in p2man:

285

copies[dst] = copies1[dst]

285

copies[dst] = copies1[dst]

286

elif dst not in copies1:

286

elif dst not in copies1:

287

# Copied on p2 side: mark as copy from p2 side if it didn't

287

# Copied on p2 side: mark as copy from p2 side if it didn't

288

# already exist on p1 side

288

# already exist on p1 side

289

if dst not in p1man:

289

if dst not in p1man:

290

copies[dst] = copies2[dst]

290

copies[dst] = copies2[dst]

291

else:

291

else:

292

# Copied on both sides: mark as copy from p1 side

292

# Copied on both sides: mark as copy from p1 side

293

copies[dst] = copies1[dst]

293

copies[dst] = copies1[dst]

294

else:

294

else:

295

copies = copies1

295

copies = copies1

296

if r == b.rev():

296

if r == b.rev():

297

return copies

297

return copies

298

for c in children[r]:

298

for c in children[r]:

299

childctx = repo[c]

299

childctx = repo[c]

300

if r == childctx.p1().rev():

300

if r == childctx.p1().rev():

301

parent = 1

301

parent = 1

302

childcopies = childctx.p1copies()

302

childcopies = childctx.p1copies()

303

else:

303

else:

304

assert r == childctx.p2().rev()

304

assert r == childctx.p2().rev()

305

parent = 2

305

parent = 2

306

childcopies = childctx.p2copies()

306

childcopies = childctx.p2copies()

307

if not match.always():

307

if not match.always():

308

childcopies = {dst: src for dst, src in childcopies.items()

308

childcopies = {dst: src for dst, src in childcopies.items()

309

if match(dst)}

309

if match(dst)}

310

childcopies = _chain(a, childctx, copies, childcopies)

310

childcopies = _chain(a, childctx, copies, childcopies)

311

heapq.heappush(work, (c, parent, childcopies))

311

heapq.heappush(work, (c, parent, childcopies))

312

assert False

312

assert False

313

314

def _forwardcopies(a, b, match=None):

314

def _forwardcopies(a, b, match=None):

315

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

315

"""find {dst@b: src@a} copy mapping where a is an ancestor of b"""

316

317

match = a.repo().narrowmatch(match)

317

match = a.repo().narrowmatch(match)

318

# check for working copy

318

# check for working copy

319

if b.rev() is None:

319

if b.rev() is None:

320

if a == b.p1():

320

if a == b.p1():

321

# short-circuit to avoid issues with merge states

321

# short-circuit to avoid issues with merge states

322

return _dirstatecopies(b._repo, match)

322

return _dirstatecopies(b._repo, match)

323

324

cm = _committedforwardcopies(a, b.p1(), match)

324

cm = _committedforwardcopies(a, b.p1(), match)

325

# combine copies from dirstate if necessary

325

# combine copies from dirstate if necessary

326

return _chain(a, b, cm, _dirstatecopies(b._repo, match))

326

return _chain(a, b, cm, _dirstatecopies(b._repo, match))

327

return _committedforwardcopies(a, b, match)

327

return _committedforwardcopies(a, b, match)

328

329

def _backwardrenames(a, b, match):

329

def _backwardrenames(a, b, match):

330

if a._repo.ui.config('experimental', 'copytrace') == 'off':

330

if a._repo.ui.config('experimental', 'copytrace') == 'off':

331

return {}

331

return {}

332

333

# Even though we're not taking copies into account, 1:n rename situations

333

# Even though we're not taking copies into account, 1:n rename situations

334

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

334

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

335

# arbitrarily pick one of the renames.

335

# arbitrarily pick one of the renames.

336

# We don't want to pass in "match" here, since that would filter

336

# We don't want to pass in "match" here, since that would filter

337

# the destination by it. Since we're reversing the copies, we want

337

# the destination by it. Since we're reversing the copies, we want

338

# to filter the source instead.

338

# to filter the source instead.

339

f = _forwardcopies(b, a)

339

f = _forwardcopies(b, a)

340

r = {}

340

r = {}

341

for k, v in sorted(f.iteritems()):

341

for k, v in sorted(f.iteritems()):

342

if match and not match(v):

342

if match and not match(v):

343

continue

343

continue

344

# remove copies

344

# remove copies

345

if v in a:

345

if v in a:

346

continue

346

continue

347

r[v] = k

347

r[v] = k

348

return r

348

return r

349

350

def pathcopies(x, y, match=None):

350

def pathcopies(x, y, match=None):

351

"""find {dst@y: src@x} copy mapping for directed compare"""

351

"""find {dst@y: src@x} copy mapping for directed compare"""

352

repo = x._repo

352

repo = x._repo

353

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

353

debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')

354

if debug:

354

if debug:

355

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

355

repo.ui.debug('debug.copies: searching copies from %s to %s\n'

356

% (x, y))

356

% (x, y))

357

if x == y or not x or not y:

357

if x == y or not x or not y:

358

return {}

358

return {}

359

a = y.ancestor(x)

359

a = y.ancestor(x)

360

if a == x:

360

if a == x:

361

if debug:

361

if debug:

362

repo.ui.debug('debug.copies: search mode: forward\n')

362

repo.ui.debug('debug.copies: search mode: forward\n')

363

return _forwardcopies(x, y, match=match)

363

return _forwardcopies(x, y, match=match)

364

if a == y:

364

if a == y:

365

if debug:

365

if debug:

366

repo.ui.debug('debug.copies: search mode: backward\n')

366

repo.ui.debug('debug.copies: search mode: backward\n')

367

return _backwardrenames(x, y, match=match)

367

return _backwardrenames(x, y, match=match)

368

if debug:

368

if debug:

369

repo.ui.debug('debug.copies: search mode: combined\n')

369

repo.ui.debug('debug.copies: search mode: combined\n')

370

return _chain(x, y, _backwardrenames(x, a, match=match),

370

return _chain(x, y, _backwardrenames(x, a, match=match),

371

_forwardcopies(a, y, match=match))

371

_forwardcopies(a, y, match=match))

372

373

def mergecopies(repo, c1, c2, base):

373

def mergecopies(repo, c1, c2, base):

374

"""

374

"""

375

Finds moves and copies between context c1 and c2 that are relevant for

375

Finds moves and copies between context c1 and c2 that are relevant for

376

merging. 'base' will be used as the merge base.

376

merging. 'base' will be used as the merge base.

377

378

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

378

Copytracing is used in commands like rebase, merge, unshelve, etc to merge

379

files that were moved/ copied in one merge parent and modified in another.

379

files that were moved/ copied in one merge parent and modified in another.

380

For example:

380

For example:

381

382

o ---> 4 another commit

382

o ---> 4 another commit

383

|

383

|

384

| o ---> 3 commit that modifies a.txt

384

| o ---> 3 commit that modifies a.txt

385

| /

385

| /

386

o / ---> 2 commit that moves a.txt to b.txt

386

o / ---> 2 commit that moves a.txt to b.txt

387

|/

387

|/

388

o ---> 1 merge base

388

o ---> 1 merge base

389

390

If we try to rebase revision 3 on revision 4, since there is no a.txt in

390

If we try to rebase revision 3 on revision 4, since there is no a.txt in

391

revision 4, and if user have copytrace disabled, we prints the following

391

revision 4, and if user have copytrace disabled, we prints the following

392

message:

392

message:

393

394

```other changed <file> which local deleted```

394

```other changed <file> which local deleted```

395

396

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

396

Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and

397

"dirmove".

397

"dirmove".

398

399

"copy" is a mapping from destination name -> source name,

399

"copy" is a mapping from destination name -> source name,

400

where source is in c1 and destination is in c2 or vice-versa.

400

where source is in c1 and destination is in c2 or vice-versa.

401

402

"movewithdir" is a mapping from source name -> destination name,

402

"movewithdir" is a mapping from source name -> destination name,

403

where the file at source present in one context but not the other

403

where the file at source present in one context but not the other

404

needs to be moved to destination by the merge process, because the

404

needs to be moved to destination by the merge process, because the

405

other context moved the directory it is in.

405

other context moved the directory it is in.

406

407

"diverge" is a mapping of source name -> list of destination names

407

"diverge" is a mapping of source name -> list of destination names

408

for divergent renames.

408

for divergent renames.

409

410

"renamedelete" is a mapping of source name -> list of destination

410

"renamedelete" is a mapping of source name -> list of destination

411

names for files deleted in c1 that were renamed in c2 or vice-versa.

411

names for files deleted in c1 that were renamed in c2 or vice-versa.

412

413

"dirmove" is a mapping of detected source dir -> destination dir renames.

413

"dirmove" is a mapping of detected source dir -> destination dir renames.

414

This is needed for handling changes to new files previously grafted into

414

This is needed for handling changes to new files previously grafted into

415

renamed directories.

415

renamed directories.

416

417

This function calls different copytracing algorithms based on config.

417

This function calls different copytracing algorithms based on config.

418

"""

418

"""

419

# avoid silly behavior for update from empty dir

419

# avoid silly behavior for update from empty dir

420

if not c1 or not c2 or c1 == c2:

420

if not c1 or not c2 or c1 == c2:

421

return {}, {}, {}, {}, {}

421

return {}, {}, {}, {}, {}

422

423

narrowmatch = c1.repo().narrowmatch()

423

narrowmatch = c1.repo().narrowmatch()

424

425

# avoid silly behavior for parent -> working dir

425

# avoid silly behavior for parent -> working dir

426

if c2.node() is None and c1.node() == repo.dirstate.p1():

426

if c2.node() is None and c1.node() == repo.dirstate.p1():

427

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

427

return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}

428

429

copytracing = repo.ui.config('experimental', 'copytrace')

429

copytracing = repo.ui.config('experimental', 'copytrace')

430

if stringutil.parsebool(copytracing) is False:

430

if stringutil.parsebool(copytracing) is False:

431

# stringutil.parsebool() returns None when it is unable to parse the

431

# stringutil.parsebool() returns None when it is unable to parse the

432

# value, so we should rely on making sure copytracing is on such cases

432

# value, so we should rely on making sure copytracing is on such cases

433

return {}, {}, {}, {}, {}

433

return {}, {}, {}, {}, {}

434

435

if usechangesetcentricalgo(repo):

435

if usechangesetcentricalgo(repo):

436

# The heuristics don't make sense when we need changeset-centric algos

436

# The heuristics don't make sense when we need changeset-centric algos

437

return _fullcopytracing(repo, c1, c2, base)

437

return _fullcopytracing(repo, c1, c2, base)

438

439

# Copy trace disabling is explicitly below the node == p1 logic above

439

# Copy trace disabling is explicitly below the node == p1 logic above

440

# because the logic above is required for a simple copy to be kept across a

440

# because the logic above is required for a simple copy to be kept across a

441

# rebase.

441

# rebase.

442

if copytracing == 'heuristics':

442

if copytracing == 'heuristics':

443

# Do full copytracing if only non-public revisions are involved as

443

# Do full copytracing if only non-public revisions are involved as

444

# that will be fast enough and will also cover the copies which could

444

# that will be fast enough and will also cover the copies which could

445

# be missed by heuristics

445

# be missed by heuristics

446

if _isfullcopytraceable(repo, c1, base):

446

if _isfullcopytraceable(repo, c1, base):

447

return _fullcopytracing(repo, c1, c2, base)

447

return _fullcopytracing(repo, c1, c2, base)

448

return _heuristicscopytracing(repo, c1, c2, base)

448

return _heuristicscopytracing(repo, c1, c2, base)

449

else:

449

else:

450

return _fullcopytracing(repo, c1, c2, base)

450

return _fullcopytracing(repo, c1, c2, base)

451

452

def _isfullcopytraceable(repo, c1, base):

452

def _isfullcopytraceable(repo, c1, base):

453

""" Checks that if base, source and destination are all no-public branches,

453

""" Checks that if base, source and destination are all no-public branches,

454

if yes let's use the full copytrace algorithm for increased capabilities

454

if yes let's use the full copytrace algorithm for increased capabilities

455

since it will be fast enough.

455

since it will be fast enough.

456

457

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

457

`experimental.copytrace.sourcecommitlimit` can be used to set a limit for

458

number of changesets from c1 to base such that if number of changesets are

458

number of changesets from c1 to base such that if number of changesets are

459

more than the limit, full copytracing algorithm won't be used.

459

more than the limit, full copytracing algorithm won't be used.

460

"""

460

"""

461

if c1.rev() is None:

461

if c1.rev() is None:

462

c1 = c1.p1()

462

c1 = c1.p1()

463

if c1.mutable() and base.mutable():

463

if c1.mutable() and base.mutable():

464

sourcecommitlimit = repo.ui.configint('experimental',

464

sourcecommitlimit = repo.ui.configint('experimental',

465

'copytrace.sourcecommitlimit')

465

'copytrace.sourcecommitlimit')

466

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

466

commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))

467

return commits < sourcecommitlimit

467

return commits < sourcecommitlimit

468

return False

468

return False

469

470

def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,

470

def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,

471

copy, renamedelete):

471

copy, renamedelete):

472

if src not in m2:

472

if src not in m2:

473

# deleted on side 2

473

# deleted on side 2

474

if src not in m1:

474

if src not in m1:

475

# renamed on side 1, deleted on side 2

475

# renamed on side 1, deleted on side 2

476

renamedelete[src] = dsts1

476

renamedelete[src] = dsts1

477

elif m2[src] != mb[src]:

477

elif m2[src] != mb[src]:

478

if not _related(c2[src], base[src]):

478

if not _related(c2[src], base[src]):

479

return

479

return

480

# modified on side 2

480

# modified on side 2

481

for dst in dsts1:

481

for dst in dsts1:

482

if dst not in m2:

482

if dst not in m2:

483

# dst not added on side 2 (handle as regular

483

# dst not added on side 2 (handle as regular

484

# "both created" case in manifestmerge otherwise)

484

# "both created" case in manifestmerge otherwise)

485

copy[dst] = src

485

copy[dst] = src

486

487

def _fullcopytracing(repo, c1, c2, base):

487

def _fullcopytracing(repo, c1, c2, base):

488

""" The full copytracing algorithm which finds all the new files that were

488

""" The full copytracing algorithm which finds all the new files that were

489

added from merge base up to the top commit and for each file it checks if

489

added from merge base up to the top commit and for each file it checks if

490

this file was copied from another file.

490

this file was copied from another file.

491

492

This is pretty slow when a lot of changesets are involved but will track all

492

This is pretty slow when a lot of changesets are involved but will track all

493

the copies.

493

the copies.

494

"""

494

"""

495

m1 = c1.manifest()

495

m1 = c1.manifest()

496

m2 = c2.manifest()

496

m2 = c2.manifest()

497

mb = base.manifest()

497

mb = base.manifest()

498

499

copies1 = pathcopies(base, c1)

499

copies1 = pathcopies(base, c1)

500

copies2 = pathcopies(base, c2)

500

copies2 = pathcopies(base, c2)

501

502

inversecopies1 = {}

502

inversecopies1 = {}

503

inversecopies2 = {}

503

inversecopies2 = {}

504

for dst, src in copies1.items():

504

for dst, src in copies1.items():

505

inversecopies1.setdefault(src, []).append(dst)

505

inversecopies1.setdefault(src, []).append(dst)

506

for dst, src in copies2.items():

506

for dst, src in copies2.items():

507

inversecopies2.setdefault(src, []).append(dst)

507

inversecopies2.setdefault(src, []).append(dst)

508

509

copy = {}

509

copy = {}

510

diverge = {}

510

diverge = {}

511

renamedelete = {}

511

renamedelete = {}

512

allsources = set(inversecopies1) | set(inversecopies2)

512

allsources = set(inversecopies1) | set(inversecopies2)

513

for src in allsources:

513

for src in allsources:

514

dsts1 = inversecopies1.get(src)

514

dsts1 = inversecopies1.get(src)

515

dsts2 = inversecopies2.get(src)

515

dsts2 = inversecopies2.get(src)

516

if dsts1 and dsts2:

516

if dsts1 and dsts2:

517

# copied/renamed on both sides

517

# copied/renamed on both sides

518

if src not in m1 and src not in m2:

518

if src not in m1 and src not in m2:

519

# renamed on both sides

519

# renamed on both sides

520

dsts1 = set(dsts1)

520

dsts1 = set(dsts1)

521

dsts2 = set(dsts2)

521

dsts2 = set(dsts2)

522

# If there's some overlap in the rename destinations, we

522

# If there's some overlap in the rename destinations, we

523

# consider it not divergent. For example, if side 1 copies 'a'

523

# consider it not divergent. For example, if side 1 copies 'a'

524

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

524

# to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'

525

# and 'd' and deletes 'a'.

525

# and 'd' and deletes 'a'.

526

if dsts1 & dsts2:

526

if dsts1 & dsts2:

527

for dst in (dsts1 & dsts2):

527

for dst in (dsts1 & dsts2):

528

copy[dst] = src

528

copy[dst] = src

529

else:

529

else:

530

diverge[src] = sorted(dsts1 | dsts2)

530

diverge[src] = sorted(dsts1 | dsts2)

531

elif src in m1 and src in m2:

531

elif src in m1 and src in m2:

532

# copied on both sides

532

# copied on both sides

533

dsts1 = set(dsts1)

533

dsts1 = set(dsts1)

534

dsts2 = set(dsts2)

534

dsts2 = set(dsts2)

535

for dst in (dsts1 & dsts2):

535

for dst in (dsts1 & dsts2):

536

copy[dst] = src

536

copy[dst] = src

537

# TODO: Handle cases where it was renamed on one side and copied

537

# TODO: Handle cases where it was renamed on one side and copied

538

# on the other side

538

# on the other side

539

elif dsts1:

539

elif dsts1:

540

# copied/renamed only on side 1

540

# copied/renamed only on side 1

541

_checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,

541

_checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,

542

copy, renamedelete)

542

copy, renamedelete)

543

elif dsts2:

543

elif dsts2:

544

# copied/renamed only on side 2

544

# copied/renamed only on side 2

545

_checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,

545

_checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,

546

copy, renamedelete)

546

copy, renamedelete)

547

548

renamedeleteset = set()

548

renamedeleteset = set()

549

divergeset = set()

549

divergeset = set()

550

for dsts in diverge.values():

550

for dsts in diverge.values():

551

divergeset.update(dsts)

551

divergeset.update(dsts)

552

for dsts in renamedelete.values():

552

for dsts in renamedelete.values():

553

renamedeleteset.update(dsts)

553

renamedeleteset.update(dsts)

554

555

# find interesting file sets from manifests

555

# find interesting file sets from manifests

556

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

556

addedinm1 = m1.filesnotin(mb, repo.narrowmatch())

557

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

557

addedinm2 = m2.filesnotin(mb, repo.narrowmatch())

558

u1 = sorted(addedinm1 - addedinm2)

558

u1 = sorted(addedinm1 - addedinm2)

559

u2 = sorted(addedinm2 - addedinm1)

559

u2 = sorted(addedinm2 - addedinm1)

560

561

header = " unmatched files in %s"

561

header = " unmatched files in %s"

562

if u1:

562

if u1:

563

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

563

repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))

564

if u2:

564

if u2:

565

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

565

repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))

566

567

fullcopy = copies1.copy()

567

fullcopy = copies1.copy()

568

fullcopy.update(copies2)

568

fullcopy.update(copies2)

569

if not fullcopy:

569

if not fullcopy:

570

return copy, {}, diverge, renamedelete, {}

570

return copy, {}, diverge, renamedelete, {}

571

572

if repo.ui.debugflag:

572

if repo.ui.debugflag:

573

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

573

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

574

"% = renamed and deleted):\n")

574

"% = renamed and deleted):\n")

575

for f in sorted(fullcopy):

575

for f in sorted(fullcopy):

576

note = ""

576

note = ""

577

if f in copy:

577

if f in copy:

578

note += "*"

578

note += "*"

579

if f in divergeset:

579

if f in divergeset:

580

note += "!"

580

note += "!"

581

if f in renamedeleteset:

581

if f in renamedeleteset:

582

note += "%"

582

note += "%"

583

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

583

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

584

note))

584

note))

585

del divergeset

585

del divergeset

586

587

repo.ui.debug(" checking for directory renames\n")

587

repo.ui.debug(" checking for directory renames\n")

588

589

# generate a directory move map

589

# generate a directory move map

590

d1, d2 = c1.dirs(), c2.dirs()

590

d1, d2 = c1.dirs(), c2.dirs()

591

# Hack for adding '', which is not otherwise added, to d1 and d2

591

# Hack for adding '', which is not otherwise added, to d1 and d2

592

d1.addpath('/')

592

d1.addpath('/')

593

d2.addpath('/')

593

d2.addpath('/')

594

invalid = set()

594

invalid = set()

595

dirmove = {}

595

dirmove = {}

596

597

# examine each file copy for a potential directory move, which is

597

# examine each file copy for a potential directory move, which is

598

# when all the files in a directory are moved to a new directory

598

# when all the files in a directory are moved to a new directory

599

for dst, src in fullcopy.iteritems():

599

for dst, src in fullcopy.iteritems():

600

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

600

dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)

601

if dsrc in invalid:

601

if dsrc in invalid:

602

# already seen to be uninteresting

602

# already seen to be uninteresting

603

continue

603

continue

604

elif dsrc in d1 and ddst in d1:

604

elif dsrc in d1 and ddst in d1:

605

# directory wasn't entirely moved locally

605

# directory wasn't entirely moved locally

606

invalid.add(dsrc)

606

invalid.add(dsrc)

607

elif dsrc in d2 and ddst in d2:

607

elif dsrc in d2 and ddst in d2:

608

# directory wasn't entirely moved remotely

608

# directory wasn't entirely moved remotely

609

invalid.add(dsrc)

609

invalid.add(dsrc)

610

elif dsrc in dirmove and dirmove[dsrc] != ddst:

610

elif dsrc in dirmove and dirmove[dsrc] != ddst:

611

# files from the same directory moved to two different places

611

# files from the same directory moved to two different places

612

invalid.add(dsrc)

612

invalid.add(dsrc)

613

else:

613

else:

614

# looks good so far

614

# looks good so far

615

dirmove[dsrc] = ddst

615

dirmove[dsrc] = ddst

616

617

for i in invalid:

617

for i in invalid:

618

if i in dirmove:

618

if i in dirmove:

619

del dirmove[i]

619

del dirmove[i]

620

del d1, d2, invalid

620

del d1, d2, invalid

621

622

if not dirmove:

622

if not dirmove:

623

return copy, {}, diverge, renamedelete, {}

623

return copy, {}, diverge, renamedelete, {}

624

625

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

625

dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}

626

627

for d in dirmove:

627

for d in dirmove:

628

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

628

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

629

(d, dirmove[d]))

629

(d, dirmove[d]))

630

631

movewithdir = {}

631

movewithdir = {}

632

# check unaccounted nonoverlapping files against directory moves

632

# check unaccounted nonoverlapping files against directory moves

633

for f in u1 + u2:

633

for f in u1 + u2:

634

if f not in fullcopy:

634

if f not in fullcopy:

635

for d in dirmove:

635

for d in dirmove:

636

if f.startswith(d):

636

if f.startswith(d):

637

# new file added in a directory that was moved, move it

637

# new file added in a directory that was moved, move it

638

df = dirmove[d] + f[len(d):]

638

df = dirmove[d] + f[len(d):]

639

if df not in copy:

639

if df not in copy:

640

movewithdir[f] = df

640

movewithdir[f] = df

641

repo.ui.debug((" pending file src: '%s' -> "

641

repo.ui.debug((" pending file src: '%s' -> "

642

"dst: '%s'\n") % (f, df))

642

"dst: '%s'\n") % (f, df))

643

break

643

break

644

645

return copy, movewithdir, diverge, renamedelete, dirmove

645

return copy, movewithdir, diverge, renamedelete, dirmove

646

647

def _heuristicscopytracing(repo, c1, c2, base):

647

def _heuristicscopytracing(repo, c1, c2, base):

648

""" Fast copytracing using filename heuristics

648

""" Fast copytracing using filename heuristics

649

650

Assumes that moves or renames are of following two types:

650

Assumes that moves or renames are of following two types:

651

652

1) Inside a directory only (same directory name but different filenames)

652

1) Inside a directory only (same directory name but different filenames)

653

2) Move from one directory to another

653

2) Move from one directory to another

654

(same filenames but different directory names)

654

(same filenames but different directory names)

655

656

Works only when there are no merge commits in the "source branch".

656

Works only when there are no merge commits in the "source branch".

657

Source branch is commits from base up to c2 not including base.

657

Source branch is commits from base up to c2 not including base.

658

659

If merge is involved it fallbacks to _fullcopytracing().

659

If merge is involved it fallbacks to _fullcopytracing().

660

661

Can be used by setting the following config:

661

Can be used by setting the following config:

662

663

[experimental]

663

[experimental]

664

copytrace = heuristics

664

copytrace = heuristics

665

666

In some cases the copy/move candidates found by heuristics can be very large

666

In some cases the copy/move candidates found by heuristics can be very large

667

in number and that will make the algorithm slow. The number of possible

667

in number and that will make the algorithm slow. The number of possible

668

candidates to check can be limited by using the config

668

candidates to check can be limited by using the config

669

`experimental.copytrace.movecandidateslimit` which defaults to 100.

669

`experimental.copytrace.movecandidateslimit` which defaults to 100.

670

"""

670

"""

671

672

if c1.rev() is None:

672

if c1.rev() is None:

673

c1 = c1.p1()

673

c1 = c1.p1()

674

if c2.rev() is None:

674

if c2.rev() is None:

675

c2 = c2.p1()

675

c2 = c2.p1()

676

677

copies = {}

677

copies = {}

678

679

changedfiles = set()

679

changedfiles = set()

680

m1 = c1.manifest()

680

m1 = c1.manifest()

681

if not repo.revs('%d::%d', base.rev(), c2.rev()):

681

if not repo.revs('%d::%d', base.rev(), c2.rev()):

682

# If base is not in c2 branch, we switch to fullcopytracing

682

# If base is not in c2 branch, we switch to fullcopytracing

683

repo.ui.debug("switching to full copytracing as base is not "

683

repo.ui.debug("switching to full copytracing as base is not "

684

"an ancestor of c2\n")

684

"an ancestor of c2\n")

685

return _fullcopytracing(repo, c1, c2, base)

685

return _fullcopytracing(repo, c1, c2, base)

686

687

ctx = c2

687

ctx = c2

688

while ctx != base:

688

while ctx != base:

689

if len(ctx.parents()) == 2:

689

if len(ctx.parents()) == 2:

690

# To keep things simple let's not handle merges

690

# To keep things simple let's not handle merges

691

repo.ui.debug("switching to full copytracing because of merges\n")

691

repo.ui.debug("switching to full copytracing because of merges\n")

692

return _fullcopytracing(repo, c1, c2, base)

692

return _fullcopytracing(repo, c1, c2, base)

693

changedfiles.update(ctx.files())

693

changedfiles.update(ctx.files())

694

ctx = ctx.p1()

694

ctx = ctx.p1()

695

696

cp = _forwardcopies(base, c2)

696

cp = _forwardcopies(base, c2)

697

for dst, src in cp.iteritems():

697

for dst, src in cp.iteritems():

698

if src in m1:

698

if src in m1:

699

copies[dst] = src

699

copies[dst] = src

700

701

# file is missing if it isn't present in the destination, but is present in

701

# file is missing if it isn't present in the destination, but is present in

702

# the base and present in the source.

702

# the base and present in the source.

703

# Presence in the base is important to exclude added files, presence in the

703

# Presence in the base is important to exclude added files, presence in the

704

# source is important to exclude removed files.

704

# source is important to exclude removed files.

705

filt = lambda f: f not in m1 and f in base and f in c2

705

filt = lambda f: f not in m1 and f in base and f in c2

706

missingfiles = [f for f in changedfiles if filt(f)]

706

missingfiles = [f for f in changedfiles if filt(f)]

707

708

if missingfiles:

708

if missingfiles:

709

basenametofilename = collections.defaultdict(list)

709

basenametofilename = collections.defaultdict(list)

710

dirnametofilename = collections.defaultdict(list)

710

dirnametofilename = collections.defaultdict(list)

711

712

for f in m1.filesnotin(base.manifest()):

712

for f in m1.filesnotin(base.manifest()):

713

basename = os.path.basename(f)

713

basename = os.path.basename(f)

714

dirname = os.path.dirname(f)

714

dirname = os.path.dirname(f)

715

basenametofilename[basename].append(f)

715

basenametofilename[basename].append(f)

716

dirnametofilename[dirname].append(f)

716

dirnametofilename[dirname].append(f)

717

718

for f in missingfiles:

718

for f in missingfiles:

719

basename = os.path.basename(f)

719

basename = os.path.basename(f)

720

dirname = os.path.dirname(f)

720

dirname = os.path.dirname(f)

721

samebasename = basenametofilename[basename]

721

samebasename = basenametofilename[basename]

722

samedirname = dirnametofilename[dirname]

722

samedirname = dirnametofilename[dirname]

723

movecandidates = samebasename + samedirname

723

movecandidates = samebasename + samedirname

724

# f is guaranteed to be present in c2, that's why

724

# f is guaranteed to be present in c2, that's why

725

# c2.filectx(f) won't fail

725

# c2.filectx(f) won't fail

726

f2 = c2.filectx(f)

726

f2 = c2.filectx(f)

727

# we can have a lot of candidates which can slow down the heuristics

727

# we can have a lot of candidates which can slow down the heuristics

728

# config value to limit the number of candidates moves to check

728

# config value to limit the number of candidates moves to check

729

maxcandidates = repo.ui.configint('experimental',

729

maxcandidates = repo.ui.configint('experimental',

730

'copytrace.movecandidateslimit')

730

'copytrace.movecandidateslimit')

731

732

if len(movecandidates) > maxcandidates:

732

if len(movecandidates) > maxcandidates:

733

repo.ui.status(_("skipping copytracing for '%s', more "

733

repo.ui.status(_("skipping copytracing for '%s', more "

734

"candidates than the limit: %d\n")

734

"candidates than the limit: %d\n")

735

% (f, len(movecandidates)))

735

% (f, len(movecandidates)))

736

continue

736

continue

737

738

for candidate in movecandidates:

738

for candidate in movecandidates:

739

f1 = c1.filectx(candidate)

739

f1 = c1.filectx(candidate)

740

if _related(f1, f2):

740

if _related(f1, f2):

741

# if there are a few related copies then we'll merge

741

# if there are a few related copies then we'll merge

742

# changes into all of them. This matches the behaviour

742

# changes into all of them. This matches the behaviour

743

# of upstream copytracing

743

# of upstream copytracing

744

copies[candidate] = f

744

copies[candidate] = f

745

746

return copies, {}, {}, {}, {}

746

return copies, {}, {}, {}, {}

747

748

def _related(f1, f2):

748

def _related(f1, f2):

749

"""return True if f1 and f2 filectx have a common ancestor

749

"""return True if f1 and f2 filectx have a common ancestor

750

751

Walk back to common ancestor to see if the two files originate

751

Walk back to common ancestor to see if the two files originate

752

from the same file. Since workingfilectx's rev() is None it messes

752

from the same file. Since workingfilectx's rev() is None it messes

753

up the integer comparison logic, hence the pre-step check for

753

up the integer comparison logic, hence the pre-step check for

754

None (f1 and f2 can only be workingfilectx's initially).

754

None (f1 and f2 can only be workingfilectx's initially).

755

"""

755

"""

756

757

if f1 == f2:

757

if f1 == f2:

758

return True # a match

758

return True # a match

759

760

g1, g2 = f1.ancestors(), f2.ancestors()

760

g1, g2 = f1.ancestors(), f2.ancestors()

761

try:

761

try:

762

f1r, f2r = f1.linkrev(), f2.linkrev()

762

f1r, f2r = f1.linkrev(), f2.linkrev()

763

764

if f1r is None:

764

if f1r is None:

765

f1 = next(g1)

765

f1 = next(g1)

766

if f2r is None:

766

if f2r is None:

767

f2 = next(g2)

767

f2 = next(g2)

768

769

while True:

769

while True:

770

f1r, f2r = f1.linkrev(), f2.linkrev()

770

f1r, f2r = f1.linkrev(), f2.linkrev()

771

if f1r > f2r:

771

if f1r > f2r:

772

f1 = next(g1)

772

f1 = next(g1)

773

elif f2r > f1r:

773

elif f2r > f1r:

774

f2 = next(g2)

774

f2 = next(g2)

775

else: # f1 and f2 point to files in the same linkrev

775

else: # f1 and f2 point to files in the same linkrev

776

return f1 == f2 # true if they point to the same file

776

return f1 == f2 # true if they point to the same file

777

except StopIteration:

777

except StopIteration:

778

return False

778

return False

779

780

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

780

def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):

781

"""reproduce copies from fromrev to rev in the dirstate

781

"""reproduce copies from fromrev to rev in the dirstate

782

783

If skiprev is specified, it's a revision that should be used to

783

If skiprev is specified, it's a revision that should be used to

784

filter copy records. Any copies that occur between fromrev and

784

filter copy records. Any copies that occur between fromrev and

785

skiprev will not be duplicated, even if they appear in the set of

785

skiprev will not be duplicated, even if they appear in the set of

786

copies between fromrev and rev.

786

copies between fromrev and rev.

787

"""

787

"""

788

exclude = {}

788

exclude = {}

789

ctraceconfig = repo.ui.config('experimental', 'copytrace')

789

ctraceconfig = repo.ui.config('experimental', 'copytrace')

790

bctrace = stringutil.parsebool(ctraceconfig)

790

bctrace = stringutil.parsebool(ctraceconfig)

791

if (skiprev is not None and

791

if (skiprev is not None and

792

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

792

(ctraceconfig == 'heuristics' or bctrace or bctrace is None)):

793

# copytrace='off' skips this line, but not the entire function because

793

# copytrace='off' skips this line, but not the entire function because

794

# the line below is O(size of the repo) during a rebase, while the rest

794

# the line below is O(size of the repo) during a rebase, while the rest

795

# of the function is much faster (and is required for carrying copy

795

# of the function is much faster (and is required for carrying copy

796

# metadata across the rebase anyway).

796

# metadata across the rebase anyway).

797

exclude = pathcopies(repo[fromrev], repo[skiprev])

797

exclude = pathcopies(repo[fromrev], repo[skiprev])

798

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

798

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

799

# copies.pathcopies returns backward renames, so dst might not

800

# actually be in the dirstate

801

if dst in exclude:

799

if dst in exclude:

802

continue

800

continue

803

wctx[dst].markcopied(src)

801

wctx[dst].markcopied(src)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import heapq
             import os
             from .i18n import _
             from . import (
                 match as matchmod,
                 node,
                 pathutil,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             def _findlimit(repo, ctxa, ctxb):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 wdirparents = None
                 a = ctxa.rev()
                 b = ctxb.rev()
                 if a is None:
                     wdirparents = (ctxa.p1(), ctxa.p2())
                     a = node.wdirrev
                 if b is None:
                     assert not wdirparents
                     wdirparents = (ctxb.p1(), ctxb.p2())
                     b = node.wdirrev
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 limit = node.wdirrev
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == node.wdirrev:
                         parents = [pctx.rev() for pctx in wdirparents]
                     else:
                         parents = cl.parentrevs(r)
                     if parents[1] == node.nullrev:
                         parents = parents[:1]
                     for p in parents:
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 """chain two sets of copies 'a' and 'b'"""
                 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
                 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
                 # following table (not including trivial cases). For example, case 2 is
                 # where a file existed in 'src' and remained under that name in 'mid' and
                 # then was renamed between 'mid' and 'dst'.
                 #
                 # case src mid dst result
                 #   1   x   y   -    -
                 #   2   x   y   y   x->y
                 #   3   x   y   x    -
                 #   4   x   y   z   x->z
                 #   5   -   x   y    -
                 #   6   x   x   y   x->y
                 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
                 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
                 # overwritten later, and case 3 will be removed later.
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # Found a chain, i.e. cases 3 & 4. We'll remove case 3 later.
                         t[k] = t[v]
                     else:
                         # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
                         t[k] = v
                 for k, v in list(t.items()):
                     # remove copies from files that didn't exist, i.e. case 5
                     if v not in src:
                         del t[k]
                     # remove criss-crossed copies, i.e. case 3
                     elif k in src and v in dst:
                         del t[k]
                     # remove copies to files that were then removed, i.e. case 1
                     # and file 'y' in cases 3 & 4 (in case of rename)
                     elif k not in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit):
                 """return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit"""
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if not f.isintroducedafter(limit):
                         return None
             def _dirstatecopies(repo, match=None):
                 ds = repo.dirstate
                 c = ds.copies().copy()
                 for k in list(c):
                     if ds[k] not in 'anm' or (match and not match(k)):
                         del c[k]
                 return c
             def _computeforwardmissing(a, b, match=None):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 ma = a.manifest()
                 mb = b.manifest()
                 return mb.filesnotin(ma, match=match)
             def usechangesetcentricalgo(repo):
                 """Checks if we should use changeset-centric copy algorithms"""
                 return (repo.ui.config('experimental', 'copies.read-from') in
                         ('changeset-only', 'compatibility'))
             def _committedforwardcopies(a, b, match):
                 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 repo = a._repo
                 if usechangesetcentricalgo(repo):
                     return _changesetforwardcopies(a, b, match)
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 dbg = repo.ui.debug
                 if debug:
                     dbg('debug.copies:    looking into rename from %s to %s\n'
                         % (a, b))
                 limit = _findlimit(repo, a, b)
                 if debug:
                     dbg('debug.copies:      search limit: %d\n' % limit)
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 # Computing the forward missing is quite expensive on large manifests, since
                 # it compares the entire manifests. We can optimize it in the common use
                 # case of computing what copies are in a commit versus its parent (like
                 # during a rebase or histedit). Note, we exclude merge commits from this
                 # optimization, since the ctx.files() for a merge commit is not correct for
                 # this comparison.
                 forwardmissingmatch = match
                 if b.p1() == a and b.p2().node() == node.nullid:
                     filesmatcher = matchmod.exact(b.files())
                     forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
                 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 if debug:
                     dbg('debug.copies:      missing files to search: %d\n' % len(missing))
                 for f in sorted(missing):
                     if debug:
                         dbg('debug.copies:        tracing file: %s\n' % f)
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     if debug:
                         start = util.timer()
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         if debug:
                             dbg('debug.copies:          rename of: %s\n' % ofctx._path)
                         cm[f] = ofctx.path()
                     if debug:
                         dbg('debug.copies:          time: %f seconds\n'
                             % (util.timer() - start))
                 return cm
             def _changesetforwardcopies(a, b, match):
                 if a.rev() == node.nullrev:
                     return {}
                 repo = a.repo()
                 children = {}
                 cl = repo.changelog
                 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
                 for r in missingrevs:
                     for p in cl.parentrevs(r):
                         if p == node.nullrev:
                             continue
                         if p not in children:
                             children[p] = [r]
                         else:
                             children[p].append(r)
                 roots = set(children) - set(missingrevs)
                 # 'work' contains 3-tuples of a (revision number, parent number, copies).
                 # The parent number is only used for knowing which parent the copies dict
                 # came from.
                 work = [(r, 1, {}) for r in roots]
                 heapq.heapify(work)
                 while work:
                     r, i1, copies1 = heapq.heappop(work)
                     if work and work[0][0] == r:
                         # We are tracing copies from both parents
                         r, i2, copies2 = heapq.heappop(work)
                         copies = {}
                         ctx = repo[r]
                         p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
                         allcopies = set(copies1) | set(copies2)
                         # TODO: perhaps this filtering should be done as long as ctx
                         # is merge, whether or not we're tracing from both parent.
                         for dst in allcopies:
                             if not match(dst):
                                 continue
                             if dst not in copies2:
                                 # Copied on p1 side: mark as copy from p1 side if it didn't
                                 # already exist on p2 side
                                 if dst not in p2man:
                                     copies[dst] = copies1[dst]
                             elif dst not in copies1:
                                 # Copied on p2 side: mark as copy from p2 side if it didn't
                                 # already exist on p1 side
                                 if dst not in p1man:
                                     copies[dst] = copies2[dst]
                             else:
                                 # Copied on both sides: mark as copy from p1 side
                                 copies[dst] = copies1[dst]
                     else:
                         copies = copies1
                     if r == b.rev():
                         return copies
                     for c in children[r]:
                         childctx = repo[c]
                         if r == childctx.p1().rev():
                             parent = 1
                             childcopies = childctx.p1copies()
                         else:
                             assert r == childctx.p2().rev()
                             parent = 2
                             childcopies = childctx.p2copies()
                         if not match.always():
                             childcopies = {dst: src for dst, src in childcopies.items()
                                            if match(dst)}
                         childcopies = _chain(a, childctx, copies, childcopies)
                         heapq.heappush(work, (c, parent, childcopies))
                 assert False
             def _forwardcopies(a, b, match=None):
                 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
                 match = a.repo().narrowmatch(match)
                 # check for working copy
                 if b.rev() is None:
                     if a == b.p1():
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(b._repo, match)
                     cm = _committedforwardcopies(a, b.p1(), match)
                     # combine copies from dirstate if necessary
                     return _chain(a, b, cm, _dirstatecopies(b._repo, match))
                 return _committedforwardcopies(a, b, match)
             def _backwardrenames(a, b, match):
                 if a._repo.ui.config('experimental', 'copytrace') == 'off':
                     return {}
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 # We don't want to pass in "match" here, since that would filter
                 # the destination by it. Since we're reversing the copies, we want
                 # to filter the source instead.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     if match and not match(v):
                         continue
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y, match=None):
                 """find {dst@y: src@x} copy mapping for directed compare"""
                 repo = x._repo
                 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
                 if debug:
                     repo.ui.debug('debug.copies: searching copies from %s to %s\n'
                                   % (x, y))
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: forward\n')
                     return _forwardcopies(x, y, match=match)
                 if a == y:
                     if debug:
                         repo.ui.debug('debug.copies: search mode: backward\n')
                     return _backwardrenames(x, y, match=match)
                 if debug:
                     repo.ui.debug('debug.copies: search mode: combined\n')
                 return _chain(x, y, _backwardrenames(x, a, match=match),
                               _forwardcopies(a, y, match=match))
             def mergecopies(repo, c1, c2, base):
                 """
                 Finds moves and copies between context c1 and c2 that are relevant for
                 merging. 'base' will be used as the merge base.
                 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
                 files that were moved/ copied in one merge parent and modified in another.
                 For example:
                 o          ---> 4 another commit
                 |
                 |   o      ---> 3 commit that modifies a.txt
                 |  /
                 o /        ---> 2 commit that moves a.txt to b.txt
                 |/
                 o          ---> 1 merge base
                 If we try to rebase revision 3 on revision 4, since there is no a.txt in
                 revision 4, and if user have copytrace disabled, we prints the following
                 message:
                 ```other changed <file> which local deleted```
                 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
                 "dirmove".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 "dirmove" is a mapping of detected source dir -> destination dir renames.
                 This is needed for handling changes to new files previously grafted into
                 renamed directories.
                 This function calls different copytracing algorithms based on config.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}, {}
                 narrowmatch = c1.repo().narrowmatch()
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
                 copytracing = repo.ui.config('experimental', 'copytrace')
                 if stringutil.parsebool(copytracing) is False:
                     # stringutil.parsebool() returns None when it is unable to parse the
                     # value, so we should rely on making sure copytracing is on such cases
                     return {}, {}, {}, {}, {}
                 if usechangesetcentricalgo(repo):
                     # The heuristics don't make sense when we need changeset-centric algos
                     return _fullcopytracing(repo, c1, c2, base)
                 # Copy trace disabling is explicitly below the node == p1 logic above
                 # because the logic above is required for a simple copy to be kept across a
                 # rebase.
                 if copytracing == 'heuristics':
                     # Do full copytracing if only non-public revisions are involved as
                     # that will be fast enough and will also cover the copies which could
                     # be missed by heuristics
                     if _isfullcopytraceable(repo, c1, base):
                         return _fullcopytracing(repo, c1, c2, base)
                     return _heuristicscopytracing(repo, c1, c2, base)
                 else:
                     return _fullcopytracing(repo, c1, c2, base)
             def _isfullcopytraceable(repo, c1, base):
                 """ Checks that if base, source and destination are all no-public branches,
                 if yes let's use the full copytrace algorithm for increased capabilities
                 since it will be fast enough.
                 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
                 number of changesets from c1 to base such that if number of changesets are
                 more than the limit, full copytracing algorithm won't be used.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c1.mutable() and base.mutable():
                     sourcecommitlimit = repo.ui.configint('experimental',
                                                           'copytrace.sourcecommitlimit')
                     commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
                     return commits < sourcecommitlimit
                 return False
             def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
                                        copy, renamedelete):
                 if src not in m2:
                     # deleted on side 2
                     if src not in m1:
                         # renamed on side 1, deleted on side 2
                         renamedelete[src] = dsts1
                 elif m2[src] != mb[src]:
                     if not _related(c2[src], base[src]):
                         return
                     # modified on side 2
                     for dst in dsts1:
                         if dst not in m2:
                             # dst not added on side 2 (handle as regular
                             # "both created" case in manifestmerge otherwise)
                             copy[dst] = src
             def _fullcopytracing(repo, c1, c2, base):
                 """ The full copytracing algorithm which finds all the new files that were
                 added from merge base up to the top commit and for each file it checks if
                 this file was copied from another file.
                 This is pretty slow when a lot of changesets are involved but will track all
                 the copies.
                 """
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 mb = base.manifest()
                 copies1 = pathcopies(base, c1)
                 copies2 = pathcopies(base, c2)
                 inversecopies1 = {}
                 inversecopies2 = {}
                 for dst, src in copies1.items():
                     inversecopies1.setdefault(src, []).append(dst)
                 for dst, src in copies2.items():
                     inversecopies2.setdefault(src, []).append(dst)
                 copy = {}
                 diverge = {}
                 renamedelete = {}
                 allsources = set(inversecopies1) | set(inversecopies2)
                 for src in allsources:
                     dsts1 = inversecopies1.get(src)
                     dsts2 = inversecopies2.get(src)
                     if dsts1 and dsts2:
                         # copied/renamed on both sides
                         if src not in m1 and src not in m2:
                             # renamed on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             # If there's some overlap in the rename destinations, we
                             # consider it not divergent. For example, if side 1 copies 'a'
                             # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
                             # and 'd' and deletes 'a'.
                             if dsts1 & dsts2:
                                 for dst in (dsts1 & dsts2):
                                     copy[dst] = src
                             else:
                                 diverge[src] = sorted(dsts1 | dsts2)
                         elif src in m1 and src in m2:
                             # copied on both sides
                             dsts1 = set(dsts1)
                             dsts2 = set(dsts2)
                             for dst in (dsts1 & dsts2):
                                 copy[dst] = src
                         # TODO: Handle cases where it was renamed on one side and copied
                         # on the other side
                     elif dsts1:
                         # copied/renamed only on side 1
                         _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
                                                copy, renamedelete)
                     elif dsts2:
                         # copied/renamed only on side 2
                         _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
                                                copy, renamedelete)
                 renamedeleteset = set()
                 divergeset = set()
                 for dsts in diverge.values():
                     divergeset.update(dsts)
                 for dsts in renamedelete.values():
                     renamedeleteset.update(dsts)
                 # find interesting file sets from manifests
                 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
                 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 header = "  unmatched files in %s"
                 if u1:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'local', "\n   ".join(u1)))
                 if u2:
                     repo.ui.debug("%s:\n   %s\n" % (header % 'other', "\n   ".join(u2)))
                 fullcopy = copies1.copy()
                 fullcopy.update(copies2)
                 if not fullcopy:
                     return copy, {}, diverge, renamedelete, {}
                 if repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in divergeset:
                             note += "!"
                         if f in renamedeleteset:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del divergeset
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 # Hack for adding '', which is not otherwise added, to d1 and d2
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc] = ddst
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, {}, diverge, renamedelete, {}
                 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 movewithdir = {}
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete, dirmove
             def _heuristicscopytracing(repo, c1, c2, base):
                 """ Fast copytracing using filename heuristics
                 Assumes that moves or renames are of following two types:
 ) Inside a directory only (same directory name but different filenames)
 ) Move from one directory to another
                                 (same filenames but different directory names)
                 Works only when there are no merge commits in the "source branch".
                 Source branch is commits from base up to c2 not including base.
                 If merge is involved it fallbacks to _fullcopytracing().
                 Can be used by setting the following config:
                     [experimental]
                     copytrace = heuristics
                 In some cases the copy/move candidates found by heuristics can be very large
                 in number and that will make the algorithm slow. The number of possible
                 candidates to check can be limited by using the config
                 `experimental.copytrace.movecandidateslimit` which defaults to 100.
                 """
                 if c1.rev() is None:
                     c1 = c1.p1()
                 if c2.rev() is None:
                     c2 = c2.p1()
                 copies = {}
                 changedfiles = set()
                 m1 = c1.manifest()
                 if not repo.revs('%d::%d', base.rev(), c2.rev()):
                     # If base is not in c2 branch, we switch to fullcopytracing
                     repo.ui.debug("switching to full copytracing as base is not "
                                   "an ancestor of c2\n")
                     return _fullcopytracing(repo, c1, c2, base)
                 ctx = c2
                 while ctx != base:
                     if len(ctx.parents()) == 2:
                         # To keep things simple let's not handle merges
                         repo.ui.debug("switching to full copytracing because of merges\n")
                         return _fullcopytracing(repo, c1, c2, base)
                     changedfiles.update(ctx.files())
                     ctx = ctx.p1()
                 cp = _forwardcopies(base, c2)
                 for dst, src in cp.iteritems():
                     if src in m1:
                         copies[dst] = src
                 # file is missing if it isn't present in the destination, but is present in
                 # the base and present in the source.
                 # Presence in the base is important to exclude added files, presence in the
                 # source is important to exclude removed files.
                 filt = lambda f: f not in m1 and f in base and f in c2
                 missingfiles = [f for f in changedfiles if filt(f)]
                 if missingfiles:
                     basenametofilename = collections.defaultdict(list)
                     dirnametofilename = collections.defaultdict(list)
                     for f in m1.filesnotin(base.manifest()):
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         basenametofilename[basename].append(f)
                         dirnametofilename[dirname].append(f)
                     for f in missingfiles:
                         basename = os.path.basename(f)
                         dirname = os.path.dirname(f)
                         samebasename = basenametofilename[basename]
                         samedirname = dirnametofilename[dirname]
                         movecandidates = samebasename + samedirname
                         # f is guaranteed to be present in c2, that's why
                         # c2.filectx(f) won't fail
                         f2 = c2.filectx(f)
                         # we can have a lot of candidates which can slow down the heuristics
                         # config value to limit the number of candidates moves to check
                         maxcandidates = repo.ui.configint('experimental',
                                                           'copytrace.movecandidateslimit')
                         if len(movecandidates) > maxcandidates:
                             repo.ui.status(_("skipping copytracing for '%s', more "
                                              "candidates than the limit: %d\n")
                                            % (f, len(movecandidates)))
                             continue
                         for candidate in movecandidates:
                             f1 = c1.filectx(candidate)
                             if _related(f1, f2):
                                 # if there are a few related copies then we'll merge
                                 # changes into all of them. This matches the behaviour
                                 # of upstream copytracing
                                 copies[candidate] = f
                 return copies, {}, {}, {}, {}
             def _related(f1, f2):
                 """return True if f1 and f2 filectx have a common ancestor
                 Walk back to common ancestor to see if the two files originate
                 from the same file. Since workingfilectx's rev() is None it messes
                 up the integer comparison logic, hence the pre-step check for
                 None (f1 and f2 can only be workingfilectx's initially).
                 """
                 if f1 == f2:
                     return True # a match
                 g1, g2 = f1.ancestors(), f2.ancestors()
                 try:
                     f1r, f2r = f1.linkrev(), f2.linkrev()
                     if f1r is None:
                         f1 = next(g1)
                     if f2r is None:
                         f2 = next(g2)
                     while True:
                         f1r, f2r = f1.linkrev(), f2.linkrev()
                         if f1r > f2r:
                             f1 = next(g1)
                         elif f2r > f1r:
                             f2 = next(g2)
                         else: # f1 and f2 point to files in the same linkrev
                             return f1 == f2 # true if they point to the same file
                 except StopIteration:
                     return False
             def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
                 """reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 """
                 exclude = {}
                 ctraceconfig = repo.ui.config('experimental', 'copytrace')
                 bctrace = stringutil.parsebool(ctraceconfig)
                 if (skiprev is not None and
                     (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
                     # copytrace='off' skips this line, but not the entire function because
                     # the line below is O(size of the repo) during a rebase, while the rest
                     # of the function is much faster (and is required for carrying copy
                     # metadata across the rebase anyway).
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
-                    # copies.pathcopies returns backward renames, so dst might not
-                    # actually be in the dirstate
                     if dst in exclude:
                         continue
                     wctx[dst].markcopied(src)