upstream/mercurial-mirror Commit - r23982:751d1138

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _nonoverlap(d1, d2, d3):

11

def _nonoverlap(d1, d2, d3):

12

"Return list of elements in d1 not in d2 or d3"

12

"Return list of elements in d1 not in d2 or d3"

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

14

15

def _dirname(f):

15

def _dirname(f):

16

s = f.rfind("/")

16

s = f.rfind("/")

17

if s == -1:

17

if s == -1:

18

return ""

18

return ""

19

return f[:s]

19

return f[:s]

20

21

def _findlimit(repo, a, b):

21

def _findlimit(repo, a, b):

22

"""

22

"""

23

Find the last revision that needs to be checked to ensure that a full

23

Find the last revision that needs to be checked to ensure that a full

24

transitive closure for file copies can be properly calculated.

24

transitive closure for file copies can be properly calculated.

25

Generally, this means finding the earliest revision number that's an

25

Generally, this means finding the earliest revision number that's an

26

ancestor of a or b but not both, except when a or b is a direct descendent

26

ancestor of a or b but not both, except when a or b is a direct descendent

27

of the other, in which case we can return the minimum revnum of a and b.

27

of the other, in which case we can return the minimum revnum of a and b.

28

None if no such revision exists.

28

None if no such revision exists.

29

"""

29

"""

30

31

# basic idea:

31

# basic idea:

32

# - mark a and b with different sides

32

# - mark a and b with different sides

33

# - if a parent's children are all on the same side, the parent is

33

# - if a parent's children are all on the same side, the parent is

34

# on that side, otherwise it is on no side

34

# on that side, otherwise it is on no side

35

# - walk the graph in topological order with the help of a heap;

35

# - walk the graph in topological order with the help of a heap;

36

# - add unseen parents to side map

36

# - add unseen parents to side map

37

# - clear side of any parent that has children on different sides

37

# - clear side of any parent that has children on different sides

38

# - track number of interesting revs that might still be on a side

38

# - track number of interesting revs that might still be on a side

39

# - track the lowest interesting rev seen

39

# - track the lowest interesting rev seen

40

# - quit when interesting revs is zero

40

# - quit when interesting revs is zero

41

42

cl = repo.changelog

42

cl = repo.changelog

43

working = len(cl) # pseudo rev for the working directory

43

working = len(cl) # pseudo rev for the working directory

44

if a is None:

44

if a is None:

45

a = working

45

a = working

46

if b is None:

46

if b is None:

47

b = working

47

b = working

48

49

side = {a: -1, b: 1}

49

side = {a: -1, b: 1}

50

visit = [-a, -b]

50

visit = [-a, -b]

51

heapq.heapify(visit)

51

heapq.heapify(visit)

52

interesting = len(visit)

52

interesting = len(visit)

53

hascommonancestor = False

53

hascommonancestor = False

54

limit = working

54

limit = working

55

56

while interesting:

56

while interesting:

57

r = -heapq.heappop(visit)

57

r = -heapq.heappop(visit)

58

if r == working:

58

if r == working:

59

parents = [cl.rev(p) for p in repo.dirstate.parents()]

59

parents = [cl.rev(p) for p in repo.dirstate.parents()]

60

else:

60

else:

61

parents = cl.parentrevs(r)

61

parents = cl.parentrevs(r)

62

for p in parents:

62

for p in parents:

63

if p < 0:

63

if p < 0:

64

continue

64

continue

65

if p not in side:

65

if p not in side:

66

# first time we see p; add it to visit

66

# first time we see p; add it to visit

67

side[p] = side[r]

67

side[p] = side[r]

68

if side[p]:

68

if side[p]:

69

interesting += 1

69

interesting += 1

70

heapq.heappush(visit, -p)

70

heapq.heappush(visit, -p)

71

elif side[p] and side[p] != side[r]:

71

elif side[p] and side[p] != side[r]:

72

# p was interesting but now we know better

72

# p was interesting but now we know better

73

side[p] = 0

73

side[p] = 0

74

interesting -= 1

74

interesting -= 1

75

hascommonancestor = True

75

hascommonancestor = True

76

if side[r]:

76

if side[r]:

77

limit = r # lowest rev visited

77

limit = r # lowest rev visited

78

interesting -= 1

78

interesting -= 1

79

80

if not hascommonancestor:

80

if not hascommonancestor:

81

return None

81

return None

82

83

# Consider the following flow (see test-commit-amend.t under issue4405):

83

# Consider the following flow (see test-commit-amend.t under issue4405):

84

# 1/ File 'a0' committed

84

# 1/ File 'a0' committed

85

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

85

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

86

# 3/ Move back to first commit

86

# 3/ Move back to first commit

87

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

87

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

88

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

88

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

89

#

89

#

90

# During the amend in step five, we will be in this state:

90

# During the amend in step five, we will be in this state:

91

#

91

#

92

# @ 3 temporary amend commit for a1-amend

92

# @ 3 temporary amend commit for a1-amend

93

# |

93

# |

94

# o 2 a1-amend

94

# o 2 a1-amend

95

# |

95

# |

96

# | o 1 a1

96

# | o 1 a1

97

# |/

97

# |/

98

# o 0 a0

98

# o 0 a0

99

#

99

#

100

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

100

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

101

# yet the filelog has the copy information in rev 1 and we will not look

101

# yet the filelog has the copy information in rev 1 and we will not look

102

# back far enough unless we also look at the a and b as candidates.

102

# back far enough unless we also look at the a and b as candidates.

103

# This only occurs when a is a descendent of b or visa-versa.

103

# This only occurs when a is a descendent of b or visa-versa.

104

return min(limit, a, b)

104

return min(limit, a, b)

105

106

def _chain(src, dst, a, b):

106

def _chain(src, dst, a, b):

107

'''chain two sets of copies a->b'''

107

'''chain two sets of copies a->b'''

108

t = a.copy()

108

t = a.copy()

109

for k, v in b.iteritems():

109

for k, v in b.iteritems():

110

if v in t:

110

if v in t:

111

# found a chain

111

# found a chain

112

if t[v] != k:

112

if t[v] != k:

113

# file wasn't renamed back to itself

113

# file wasn't renamed back to itself

114

t[k] = t[v]

114

t[k] = t[v]

115

if v not in dst:

115

if v not in dst:

116

# chain was a rename, not a copy

116

# chain was a rename, not a copy

117

del t[v]

117

del t[v]

118

if v in src:

118

if v in src:

119

# file is a copy of an existing file

119

# file is a copy of an existing file

120

t[k] = v

120

t[k] = v

121

122

# remove criss-crossed copies

122

# remove criss-crossed copies

123

for k, v in t.items():

123

for k, v in t.items():

124

if k in src and v in dst:

124

if k in src and v in dst:

125

del t[k]

125

del t[k]

126

127

return t

127

return t

128

129

def _tracefile(fctx, am, limit=-1):

129

def _tracefile(fctx, am, limit=-1):

130

'''return file context that is the ancestor of fctx present in ancestor

130

'''return file context that is the ancestor of fctx present in ancestor

131

manifest am, stopping after the first ancestor lower than limit'''

131

manifest am, stopping after the first ancestor lower than limit'''

132

133

for f in fctx.ancestors():

133

for f in fctx.ancestors():

134

if am.get(f.path(), None) == f.filenode():

134

if am.get(f.path(), None) == f.filenode():

135

return f

135

return f

136

if f.rev() < limit:

136

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

137

return None

137

return None

138

139

def _dirstatecopies(d):

139

def _dirstatecopies(d):

140

ds = d._repo.dirstate

140

ds = d._repo.dirstate

141

c = ds.copies().copy()

141

c = ds.copies().copy()

142

for k in c.keys():

142

for k in c.keys():

143

if ds[k] not in 'anm':

143

if ds[k] not in 'anm':

144

del c[k]

144

del c[k]

145

return c

145

return c

146

147

def _forwardcopies(a, b):

147

def _forwardcopies(a, b):

148

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

148

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

149

150

# check for working copy

150

# check for working copy

151

w = None

151

w = None

152

if b.rev() is None:

152

if b.rev() is None:

153

w = b

153

w = b

154

b = w.p1()

154

b = w.p1()

155

if a == b:

155

if a == b:

156

# short-circuit to avoid issues with merge states

156

# short-circuit to avoid issues with merge states

157

return _dirstatecopies(w)

157

return _dirstatecopies(w)

158

159

# files might have to be traced back to the fctx parent of the last

159

# files might have to be traced back to the fctx parent of the last

160

# one-side-only changeset, but not further back than that

160

# one-side-only changeset, but not further back than that

161

limit = _findlimit(a._repo, a.rev(), b.rev())

161

limit = _findlimit(a._repo, a.rev(), b.rev())

162

if limit is None:

162

if limit is None:

163

limit = -1

163

limit = -1

164

am = a.manifest()

164

am = a.manifest()

165

166

# find where new files came from

166

# find where new files came from

167

# we currently don't try to find where old files went, too expensive

167

# we currently don't try to find where old files went, too expensive

168

# this means we can miss a case like 'hg rm b; hg cp a b'

168

# this means we can miss a case like 'hg rm b; hg cp a b'

169

cm = {}

169

cm = {}

170

missing = set(b.manifest().iterkeys())

170

missing = set(b.manifest().iterkeys())

171

missing.difference_update(a.manifest().iterkeys())

171

missing.difference_update(a.manifest().iterkeys())

172

173

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

173

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

174

for f in missing:

174

for f in missing:

175

fctx = b[f]

175

fctx = b[f]

176

fctx._ancestrycontext = ancestrycontext

176

fctx._ancestrycontext = ancestrycontext

177

ofctx = _tracefile(fctx, am, limit)

177

ofctx = _tracefile(fctx, am, limit)

178

if ofctx:

178

if ofctx:

179

cm[f] = ofctx.path()

179

cm[f] = ofctx.path()

180

181

# combine copies from dirstate if necessary

181

# combine copies from dirstate if necessary

182

if w is not None:

182

if w is not None:

183

cm = _chain(a, w, cm, _dirstatecopies(w))

183

cm = _chain(a, w, cm, _dirstatecopies(w))

184

185

return cm

185

return cm

186

187

def _backwardrenames(a, b):

187

def _backwardrenames(a, b):

188

# Even though we're not taking copies into account, 1:n rename situations

188

# Even though we're not taking copies into account, 1:n rename situations

189

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

189

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

190

# arbitrarily pick one of the renames.

190

# arbitrarily pick one of the renames.

191

f = _forwardcopies(b, a)

191

f = _forwardcopies(b, a)

192

r = {}

192

r = {}

193

for k, v in sorted(f.iteritems()):

193

for k, v in sorted(f.iteritems()):

194

# remove copies

194

# remove copies

195

if v in a:

195

if v in a:

196

continue

196

continue

197

r[v] = k

197

r[v] = k

198

return r

198

return r

199

200

def pathcopies(x, y):

200

def pathcopies(x, y):

201

'''find {dst@y: src@x} copy mapping for directed compare'''

201

'''find {dst@y: src@x} copy mapping for directed compare'''

202

if x == y or not x or not y:

202

if x == y or not x or not y:

203

return {}

203

return {}

204

a = y.ancestor(x)

204

a = y.ancestor(x)

205

if a == x:

205

if a == x:

206

return _forwardcopies(x, y)

206

return _forwardcopies(x, y)

207

if a == y:

207

if a == y:

208

return _backwardrenames(x, y)

208

return _backwardrenames(x, y)

209

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

209

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

210

211

def mergecopies(repo, c1, c2, ca):

211

def mergecopies(repo, c1, c2, ca):

212

"""

212

"""

213

Find moves and copies between context c1 and c2 that are relevant

213

Find moves and copies between context c1 and c2 that are relevant

214

for merging.

214

for merging.

215

216

Returns four dicts: "copy", "movewithdir", "diverge", and

216

Returns four dicts: "copy", "movewithdir", "diverge", and

217

"renamedelete".

217

"renamedelete".

218

219

"copy" is a mapping from destination name -> source name,

219

"copy" is a mapping from destination name -> source name,

220

where source is in c1 and destination is in c2 or vice-versa.

220

where source is in c1 and destination is in c2 or vice-versa.

221

222

"movewithdir" is a mapping from source name -> destination name,

222

"movewithdir" is a mapping from source name -> destination name,

223

where the file at source present in one context but not the other

223

where the file at source present in one context but not the other

224

needs to be moved to destination by the merge process, because the

224

needs to be moved to destination by the merge process, because the

225

other context moved the directory it is in.

225

other context moved the directory it is in.

226

227

"diverge" is a mapping of source name -> list of destination names

227

"diverge" is a mapping of source name -> list of destination names

228

for divergent renames.

228

for divergent renames.

229

230

"renamedelete" is a mapping of source name -> list of destination

230

"renamedelete" is a mapping of source name -> list of destination

231

names for files deleted in c1 that were renamed in c2 or vice-versa.

231

names for files deleted in c1 that were renamed in c2 or vice-versa.

232

"""

232

"""

233

# avoid silly behavior for update from empty dir

233

# avoid silly behavior for update from empty dir

234

if not c1 or not c2 or c1 == c2:

234

if not c1 or not c2 or c1 == c2:

235

return {}, {}, {}, {}

235

return {}, {}, {}, {}

236

237

# avoid silly behavior for parent -> working dir

237

# avoid silly behavior for parent -> working dir

238

if c2.node() is None and c1.node() == repo.dirstate.p1():

238

if c2.node() is None and c1.node() == repo.dirstate.p1():

239

return repo.dirstate.copies(), {}, {}, {}

239

return repo.dirstate.copies(), {}, {}, {}

240

241

limit = _findlimit(repo, c1.rev(), c2.rev())

241

limit = _findlimit(repo, c1.rev(), c2.rev())

242

if limit is None:

242

if limit is None:

243

# no common ancestor, no copies

243

# no common ancestor, no copies

244

return {}, {}, {}, {}

244

return {}, {}, {}, {}

245

m1 = c1.manifest()

245

m1 = c1.manifest()

246

m2 = c2.manifest()

246

m2 = c2.manifest()

247

ma = ca.manifest()

247

ma = ca.manifest()

248

249

def makectx(f, n):

249

def makectx(f, n):

250

if len(n) != 20: # in a working context?

250

if len(n) != 20: # in a working context?

251

if c1.rev() is None:

251

if c1.rev() is None:

252

return c1.filectx(f)

252

return c1.filectx(f)

253

return c2.filectx(f)

253

return c2.filectx(f)

254

return repo.filectx(f, fileid=n)

254

return repo.filectx(f, fileid=n)

255

256

ctx = util.lrucachefunc(makectx)

256

ctx = util.lrucachefunc(makectx)

257

copy = {}

257

copy = {}

258

movewithdir = {}

258

movewithdir = {}

259

fullcopy = {}

259

fullcopy = {}

260

diverge = {}

260

diverge = {}

261

262

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

262

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

263

264

u1 = _nonoverlap(m1, m2, ma)

264

u1 = _nonoverlap(m1, m2, ma)

265

u2 = _nonoverlap(m2, m1, ma)

265

u2 = _nonoverlap(m2, m1, ma)

266

267

if u1:

267

if u1:

268

repo.ui.debug(" unmatched files in local:\n %s\n"

268

repo.ui.debug(" unmatched files in local:\n %s\n"

269

% "\n ".join(u1))

269

% "\n ".join(u1))

270

if u2:

270

if u2:

271

repo.ui.debug(" unmatched files in other:\n %s\n"

271

repo.ui.debug(" unmatched files in other:\n %s\n"

272

% "\n ".join(u2))

272

% "\n ".join(u2))

273

274

for f in u1:

274

for f in u1:

275

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

275

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

276

277

for f in u2:

277

for f in u2:

278

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

278

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

279

280

renamedelete = {}

280

renamedelete = {}

281

renamedelete2 = set()

281

renamedelete2 = set()

282

diverge2 = set()

282

diverge2 = set()

283

for of, fl in diverge.items():

283

for of, fl in diverge.items():

284

if len(fl) == 1 or of in c1 or of in c2:

284

if len(fl) == 1 or of in c1 or of in c2:

285

del diverge[of] # not actually divergent, or not a rename

285

del diverge[of] # not actually divergent, or not a rename

286

if of not in c1 and of not in c2:

286

if of not in c1 and of not in c2:

287

# renamed on one side, deleted on the other side, but filter

287

# renamed on one side, deleted on the other side, but filter

288

# out files that have been renamed and then deleted

288

# out files that have been renamed and then deleted

289

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

289

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

290

renamedelete2.update(fl) # reverse map for below

290

renamedelete2.update(fl) # reverse map for below

291

else:

291

else:

292

diverge2.update(fl) # reverse map for below

292

diverge2.update(fl) # reverse map for below

293

294

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

294

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

295

if bothnew:

295

if bothnew:

296

repo.ui.debug(" unmatched files new in both:\n %s\n"

296

repo.ui.debug(" unmatched files new in both:\n %s\n"

297

% "\n ".join(bothnew))

297

% "\n ".join(bothnew))

298

bothdiverge, _copy, _fullcopy = {}, {}, {}

298

bothdiverge, _copy, _fullcopy = {}, {}, {}

299

for f in bothnew:

299

for f in bothnew:

300

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

300

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

301

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

301

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

302

for of, fl in bothdiverge.items():

302

for of, fl in bothdiverge.items():

303

if len(fl) == 2 and fl[0] == fl[1]:

303

if len(fl) == 2 and fl[0] == fl[1]:

304

copy[fl[0]] = of # not actually divergent, just matching renames

304

copy[fl[0]] = of # not actually divergent, just matching renames

305

306

if fullcopy and repo.ui.debugflag:

306

if fullcopy and repo.ui.debugflag:

307

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

307

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

308

"% = renamed and deleted):\n")

308

"% = renamed and deleted):\n")

309

for f in sorted(fullcopy):

309

for f in sorted(fullcopy):

310

note = ""

310

note = ""

311

if f in copy:

311

if f in copy:

312

note += "*"

312

note += "*"

313

if f in diverge2:

313

if f in diverge2:

314

note += "!"

314

note += "!"

315

if f in renamedelete2:

315

if f in renamedelete2:

316

note += "%"

316

note += "%"

317

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

317

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

318

note))

318

note))

319

del diverge2

319

del diverge2

320

321

if not fullcopy:

321

if not fullcopy:

322

return copy, movewithdir, diverge, renamedelete

322

return copy, movewithdir, diverge, renamedelete

323

324

repo.ui.debug(" checking for directory renames\n")

324

repo.ui.debug(" checking for directory renames\n")

325

326

# generate a directory move map

326

# generate a directory move map

327

d1, d2 = c1.dirs(), c2.dirs()

327

d1, d2 = c1.dirs(), c2.dirs()

328

d1.addpath('/')

328

d1.addpath('/')

329

d2.addpath('/')

329

d2.addpath('/')

330

invalid = set()

330

invalid = set()

331

dirmove = {}

331

dirmove = {}

332

333

# examine each file copy for a potential directory move, which is

333

# examine each file copy for a potential directory move, which is

334

# when all the files in a directory are moved to a new directory

334

# when all the files in a directory are moved to a new directory

335

for dst, src in fullcopy.iteritems():

335

for dst, src in fullcopy.iteritems():

336

dsrc, ddst = _dirname(src), _dirname(dst)

336

dsrc, ddst = _dirname(src), _dirname(dst)

337

if dsrc in invalid:

337

if dsrc in invalid:

338

# already seen to be uninteresting

338

# already seen to be uninteresting

339

continue

339

continue

340

elif dsrc in d1 and ddst in d1:

340

elif dsrc in d1 and ddst in d1:

341

# directory wasn't entirely moved locally

341

# directory wasn't entirely moved locally

342

invalid.add(dsrc)

342

invalid.add(dsrc)

343

elif dsrc in d2 and ddst in d2:

343

elif dsrc in d2 and ddst in d2:

344

# directory wasn't entirely moved remotely

344

# directory wasn't entirely moved remotely

345

invalid.add(dsrc)

345

invalid.add(dsrc)

346

elif dsrc in dirmove and dirmove[dsrc] != ddst:

346

elif dsrc in dirmove and dirmove[dsrc] != ddst:

347

# files from the same directory moved to two different places

347

# files from the same directory moved to two different places

348

invalid.add(dsrc)

348

invalid.add(dsrc)

349

else:

349

else:

350

# looks good so far

350

# looks good so far

351

dirmove[dsrc + "/"] = ddst + "/"

351

dirmove[dsrc + "/"] = ddst + "/"

352

353

for i in invalid:

353

for i in invalid:

354

if i in dirmove:

354

if i in dirmove:

355

del dirmove[i]

355

del dirmove[i]

356

del d1, d2, invalid

356

del d1, d2, invalid

357

358

if not dirmove:

358

if not dirmove:

359

return copy, movewithdir, diverge, renamedelete

359

return copy, movewithdir, diverge, renamedelete

360

361

for d in dirmove:

361

for d in dirmove:

362

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

362

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

363

(d, dirmove[d]))

363

(d, dirmove[d]))

364

365

# check unaccounted nonoverlapping files against directory moves

365

# check unaccounted nonoverlapping files against directory moves

366

for f in u1 + u2:

366

for f in u1 + u2:

367

if f not in fullcopy:

367

if f not in fullcopy:

368

for d in dirmove:

368

for d in dirmove:

369

if f.startswith(d):

369

if f.startswith(d):

370

# new file added in a directory that was moved, move it

370

# new file added in a directory that was moved, move it

371

df = dirmove[d] + f[len(d):]

371

df = dirmove[d] + f[len(d):]

372

if df not in copy:

372

if df not in copy:

373

movewithdir[f] = df

373

movewithdir[f] = df

374

repo.ui.debug((" pending file src: '%s' -> "

374

repo.ui.debug((" pending file src: '%s' -> "

375

"dst: '%s'\n") % (f, df))

375

"dst: '%s'\n") % (f, df))

376

break

376

break

377

378

return copy, movewithdir, diverge, renamedelete

378

return copy, movewithdir, diverge, renamedelete

379

380

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

380

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

381

"""

381

"""

382

check possible copies of f from m1 to m2

382

check possible copies of f from m1 to m2

383

384

ctx = function accepting (filename, node) that returns a filectx.

384

ctx = function accepting (filename, node) that returns a filectx.

385

f = the filename to check

385

f = the filename to check

386

m1 = the source manifest

386

m1 = the source manifest

387

m2 = the destination manifest

387

m2 = the destination manifest

388

ca = the changectx of the common ancestor

388

ca = the changectx of the common ancestor

389

limit = the rev number to not search beyond

389

limit = the rev number to not search beyond

390

diverge = record all diverges in this dict

390

diverge = record all diverges in this dict

391

copy = record all non-divergent copies in this dict

391

copy = record all non-divergent copies in this dict

392

fullcopy = record all copies in this dict

392

fullcopy = record all copies in this dict

393

"""

393

"""

394

395

ma = ca.manifest()

395

ma = ca.manifest()

396

397

def _related(f1, f2, limit):

397

def _related(f1, f2, limit):

398

# Walk back to common ancestor to see if the two files originate

398

# Walk back to common ancestor to see if the two files originate

399

# from the same file. Since workingfilectx's rev() is None it messes

399

# from the same file. Since workingfilectx's rev() is None it messes

400

# up the integer comparison logic, hence the pre-step check for

400

# up the integer comparison logic, hence the pre-step check for

401

# None (f1 and f2 can only be workingfilectx's initially).

401

# None (f1 and f2 can only be workingfilectx's initially).

402

403

if f1 == f2:

403

if f1 == f2:

404

return f1 # a match

404

return f1 # a match

405

406

g1, g2 = f1.ancestors(), f2.ancestors()

406

g1, g2 = f1.ancestors(), f2.ancestors()

407

try:

407

try:

408

f1r, f2r = f1.rev(), f2.rev()

408

f1r, f2r = f1.rev(), f2.rev()

409

410

if f1r is None:

410

if f1r is None:

411

f1 = g1.next()

411

f1 = g1.next()

412

if f2r is None:

412

if f2r is None:

413

f2 = g2.next()

413

f2 = g2.next()

414

415

while True:

415

while True:

416

f1r, f2r = f1.rev(), f2.rev()

416

f1r, f2r = f1.rev(), f2.rev()

417

if f1r > f2r:

417

if f1r > f2r:

418

f1 = g1.next()

418

f1 = g1.next()

419

elif f2r > f1r:

419

elif f2r > f1r:

420

f2 = g2.next()

420

f2 = g2.next()

421

elif f1 == f2:

421

elif f1 == f2:

422

return f1 # a match

422

return f1 # a match

423

elif f1r == f2r or f1r < limit or f2r < limit:

423

elif f1r == f2r or f1r < limit or f2r < limit:

424

return False # copy no longer relevant

424

return False # copy no longer relevant

425

except StopIteration:

425

except StopIteration:

426

return False

426

return False

427

428

of = None

428

of = None

429

seen = set([f])

429

seen = set([f])

430

for oc in ctx(f, m1[f]).ancestors():

430

for oc in ctx(f, m1[f]).ancestors():

431

ocr = oc.rev()

431

ocr = oc.rev()

432

of = oc.path()

432

of = oc.path()

433

if of in seen:

433

if of in seen:

434

# check limit late - grab last rename before

434

# check limit late - grab last rename before

435

if ocr < limit:

435

if ocr < limit:

436

break

436

break

437

continue

437

continue

438

seen.add(of)

438

seen.add(of)

439

440

fullcopy[f] = of # remember for dir rename detection

440

fullcopy[f] = of # remember for dir rename detection

441

if of not in m2:

441

if of not in m2:

442

continue # no match, keep looking

442

continue # no match, keep looking

443

if m2[of] == ma.get(of):

443

if m2[of] == ma.get(of):

444

break # no merge needed, quit early

444

break # no merge needed, quit early

445

c2 = ctx(of, m2[of])

445

c2 = ctx(of, m2[of])

446

cr = _related(oc, c2, ca.rev())

446

cr = _related(oc, c2, ca.rev())

447

if cr and (of == f or of == c2.path()): # non-divergent

447

if cr and (of == f or of == c2.path()): # non-divergent

448

copy[f] = of

448

copy[f] = of

449

of = None

449

of = None

450

break

450

break

451

452

if of in ma:

452

if of in ma:

453

diverge.setdefault(of, []).append(f)

453

diverge.setdefault(of, []).append(f)

454

455

def duplicatecopies(repo, rev, fromrev, skiprev=None):

455

def duplicatecopies(repo, rev, fromrev, skiprev=None):

456

'''reproduce copies from fromrev to rev in the dirstate

456

'''reproduce copies from fromrev to rev in the dirstate

457

458

If skiprev is specified, it's a revision that should be used to

458

If skiprev is specified, it's a revision that should be used to

459

filter copy records. Any copies that occur between fromrev and

459

filter copy records. Any copies that occur between fromrev and

460

skiprev will not be duplicated, even if they appear in the set of

460

skiprev will not be duplicated, even if they appear in the set of

461

copies between fromrev and rev.

461

copies between fromrev and rev.

462

'''

462

'''

463

exclude = {}

463

exclude = {}

464

if skiprev is not None:

464

if skiprev is not None:

465

exclude = pathcopies(repo[fromrev], repo[skiprev])

465

exclude = pathcopies(repo[fromrev], repo[skiprev])

466

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

466

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

467

# copies.pathcopies returns backward renames, so dst might not

467

# copies.pathcopies returns backward renames, so dst might not

468

# actually be in the dirstate

468

# actually be in the dirstate

469

if dst in exclude:

469

if dst in exclude:

470

continue

470

continue

471

if repo.dirstate[dst] in "nma":

471

if repo.dirstate[dst] in "nma":

472

repo.dirstate.copy(src, dst)

472

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
             def _nonoverlap(d1, d2, d3):
                 "Return list of elements in d1 not in d2 or d3"
                 return sorted([d for d in d1 if d not in d3 and d not in d2])
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
-                    if f.rev() < limit:
+                    if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 missing = set(b.manifest().iterkeys())
                 missing.difference_update(a.manifest().iterkeys())
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def makectx(f, n):
                     if len(n) != 20: # in a working context?
                         if c1.rev() is None:
                             return c1.filectx(f)
                         return c2.filectx(f)
                     return repo.filectx(f, fileid=n)
                 ctx = util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 u1 = _nonoverlap(m1, m2, ma)
                 u2 = _nonoverlap(m2, m1, ma)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 for f in u1:
                     checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
                 for f in u2:
                     checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
                 bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge, _copy, _fullcopy = {}, {}, {}
                 for f in bothnew:
                     checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
                     checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
                 """
                 check possible copies of f from m1 to m2
                 ctx = function accepting (filename, node) that returns a filectx.
                 f = the filename to check
                 m1 = the source manifest
                 m2 = the destination manifest
                 ca = the changectx of the common ancestor
                 limit = the rev number to not search beyond
                 diverge = record all diverges in this dict
                 copy = record all non-divergent copies in this dict
                 fullcopy = record all copies in this dict
                 """
                 ma = ca.manifest()
                 def _related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.rev(), f2.rev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.rev(), f2.rev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 of = None
                 seen = set([f])
                 for oc in ctx(f, m1[f]).ancestors():
                     ocr = oc.rev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     fullcopy[f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == ma.get(of):
                         break # no merge needed, quit early
                     c2 = ctx(of, m2[of])
                     cr = _related(oc, c2, ca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         copy[f] = of
                         of = None
                         break
                 if of in ma:
                     diverge.setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if skiprev is not None:
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)