upstream/mercurial-mirror Commit - r24011:d7d08337

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _nonoverlap(d1, d2, d3):

11

def _nonoverlap(d1, d2, d3):

12

"Return list of elements in d1 not in d2 or d3"

12

"Return list of elements in d1 not in d2 or d3"

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

14

15

def _dirname(f):

15

def _dirname(f):

16

s = f.rfind("/")

16

s = f.rfind("/")

17

if s == -1:

17

if s == -1:

18

return ""

18

return ""

19

return f[:s]

19

return f[:s]

20

21

def _findlimit(repo, a, b):

21

def _findlimit(repo, a, b):

22

"""

22

"""

23

Find the last revision that needs to be checked to ensure that a full

23

Find the last revision that needs to be checked to ensure that a full

24

transitive closure for file copies can be properly calculated.

24

transitive closure for file copies can be properly calculated.

25

Generally, this means finding the earliest revision number that's an

25

Generally, this means finding the earliest revision number that's an

26

ancestor of a or b but not both, except when a or b is a direct descendent

26

ancestor of a or b but not both, except when a or b is a direct descendent

27

of the other, in which case we can return the minimum revnum of a and b.

27

of the other, in which case we can return the minimum revnum of a and b.

28

None if no such revision exists.

28

None if no such revision exists.

29

"""

29

"""

30

31

# basic idea:

31

# basic idea:

32

# - mark a and b with different sides

32

# - mark a and b with different sides

33

# - if a parent's children are all on the same side, the parent is

33

# - if a parent's children are all on the same side, the parent is

34

# on that side, otherwise it is on no side

34

# on that side, otherwise it is on no side

35

# - walk the graph in topological order with the help of a heap;

35

# - walk the graph in topological order with the help of a heap;

36

# - add unseen parents to side map

36

# - add unseen parents to side map

37

# - clear side of any parent that has children on different sides

37

# - clear side of any parent that has children on different sides

38

# - track number of interesting revs that might still be on a side

38

# - track number of interesting revs that might still be on a side

39

# - track the lowest interesting rev seen

39

# - track the lowest interesting rev seen

40

# - quit when interesting revs is zero

40

# - quit when interesting revs is zero

41

42

cl = repo.changelog

42

cl = repo.changelog

43

working = len(cl) # pseudo rev for the working directory

43

working = len(cl) # pseudo rev for the working directory

44

if a is None:

44

if a is None:

45

a = working

45

a = working

46

if b is None:

46

if b is None:

47

b = working

47

b = working

48

49

side = {a: -1, b: 1}

49

side = {a: -1, b: 1}

50

visit = [-a, -b]

50

visit = [-a, -b]

51

heapq.heapify(visit)

51

heapq.heapify(visit)

52

interesting = len(visit)

52

interesting = len(visit)

53

hascommonancestor = False

53

hascommonancestor = False

54

limit = working

54

limit = working

55

56

while interesting:

56

while interesting:

57

r = -heapq.heappop(visit)

57

r = -heapq.heappop(visit)

58

if r == working:

58

if r == working:

59

parents = [cl.rev(p) for p in repo.dirstate.parents()]

59

parents = [cl.rev(p) for p in repo.dirstate.parents()]

60

else:

60

else:

61

parents = cl.parentrevs(r)

61

parents = cl.parentrevs(r)

62

for p in parents:

62

for p in parents:

63

if p < 0:

63

if p < 0:

64

continue

64

continue

65

if p not in side:

65

if p not in side:

66

# first time we see p; add it to visit

66

# first time we see p; add it to visit

67

side[p] = side[r]

67

side[p] = side[r]

68

if side[p]:

68

if side[p]:

69

interesting += 1

69

interesting += 1

70

heapq.heappush(visit, -p)

70

heapq.heappush(visit, -p)

71

elif side[p] and side[p] != side[r]:

71

elif side[p] and side[p] != side[r]:

72

# p was interesting but now we know better

72

# p was interesting but now we know better

73

side[p] = 0

73

side[p] = 0

74

interesting -= 1

74

interesting -= 1

75

hascommonancestor = True

75

hascommonancestor = True

76

if side[r]:

76

if side[r]:

77

limit = r # lowest rev visited

77

limit = r # lowest rev visited

78

interesting -= 1

78

interesting -= 1

79

80

if not hascommonancestor:

80

if not hascommonancestor:

81

return None

81

return None

82

83

# Consider the following flow (see test-commit-amend.t under issue4405):

83

# Consider the following flow (see test-commit-amend.t under issue4405):

84

# 1/ File 'a0' committed

84

# 1/ File 'a0' committed

85

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

85

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

86

# 3/ Move back to first commit

86

# 3/ Move back to first commit

87

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

87

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

88

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

88

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

89

#

89

#

90

# During the amend in step five, we will be in this state:

90

# During the amend in step five, we will be in this state:

91

#

91

#

92

# @ 3 temporary amend commit for a1-amend

92

# @ 3 temporary amend commit for a1-amend

93

# |

93

# |

94

# o 2 a1-amend

94

# o 2 a1-amend

95

# |

95

# |

96

# | o 1 a1

96

# | o 1 a1

97

# |/

97

# |/

98

# o 0 a0

98

# o 0 a0

99

#

99

#

100

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

100

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

101

# yet the filelog has the copy information in rev 1 and we will not look

101

# yet the filelog has the copy information in rev 1 and we will not look

102

# back far enough unless we also look at the a and b as candidates.

102

# back far enough unless we also look at the a and b as candidates.

103

# This only occurs when a is a descendent of b or visa-versa.

103

# This only occurs when a is a descendent of b or visa-versa.

104

return min(limit, a, b)

104

return min(limit, a, b)

105

106

def _chain(src, dst, a, b):

106

def _chain(src, dst, a, b):

107

'''chain two sets of copies a->b'''

107

'''chain two sets of copies a->b'''

108

t = a.copy()

108

t = a.copy()

109

for k, v in b.iteritems():

109

for k, v in b.iteritems():

110

if v in t:

110

if v in t:

111

# found a chain

111

# found a chain

112

if t[v] != k:

112

if t[v] != k:

113

# file wasn't renamed back to itself

113

# file wasn't renamed back to itself

114

t[k] = t[v]

114

t[k] = t[v]

115

if v not in dst:

115

if v not in dst:

116

# chain was a rename, not a copy

116

# chain was a rename, not a copy

117

del t[v]

117

del t[v]

118

if v in src:

118

if v in src:

119

# file is a copy of an existing file

119

# file is a copy of an existing file

120

t[k] = v

120

t[k] = v

121

122

# remove criss-crossed copies

122

# remove criss-crossed copies

123

for k, v in t.items():

123

for k, v in t.items():

124

if k in src and v in dst:

124

if k in src and v in dst:

125

del t[k]

125

del t[k]

126

127

return t

127

return t

128

129

def _tracefile(fctx, am, limit=-1):

129

def _tracefile(fctx, am, limit=-1):

130

'''return file context that is the ancestor of fctx present in ancestor

130

'''return file context that is the ancestor of fctx present in ancestor

131

manifest am, stopping after the first ancestor lower than limit'''

131

manifest am, stopping after the first ancestor lower than limit'''

132

133

for f in fctx.ancestors():

133

for f in fctx.ancestors():

134

if am.get(f.path(), None) == f.filenode():

134

if am.get(f.path(), None) == f.filenode():

135

return f

135

return f

136

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

136

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

137

return None

137

return None

138

139

def _dirstatecopies(d):

139

def _dirstatecopies(d):

140

ds = d._repo.dirstate

140

ds = d._repo.dirstate

141

c = ds.copies().copy()

141

c = ds.copies().copy()

142

for k in c.keys():

142

for k in c.keys():

143

if ds[k] not in 'anm':

143

if ds[k] not in 'anm':

144

del c[k]

144

del c[k]

145

return c

145

return c

146

147

def _computeforwardmissing(a, b):

148

"""Computes which files are in b but not a.

149

This is its own function so extensions can easily wrap this call to see what

150

files _forwardcopies is about to process.

151

"""

152

missing = set(b.manifest().iterkeys())

153

missing.difference_update(a.manifest().iterkeys())

154

return missing

155

147

def _forwardcopies(a, b):

156

def _forwardcopies(a, b):

148

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

157

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

149

158

150

# check for working copy

159

# check for working copy

151

w = None

160

w = None

152

if b.rev() is None:

161

if b.rev() is None:

153

w = b

162

w = b

154

b = w.p1()

163

b = w.p1()

155

if a == b:

164

if a == b:

156

# short-circuit to avoid issues with merge states

165

# short-circuit to avoid issues with merge states

157

return _dirstatecopies(w)

166

return _dirstatecopies(w)

158

167

159

# files might have to be traced back to the fctx parent of the last

168

# files might have to be traced back to the fctx parent of the last

160

# one-side-only changeset, but not further back than that

169

# one-side-only changeset, but not further back than that

161

limit = _findlimit(a._repo, a.rev(), b.rev())

170

limit = _findlimit(a._repo, a.rev(), b.rev())

162

if limit is None:

171

if limit is None:

163

limit = -1

172

limit = -1

164

am = a.manifest()

173

am = a.manifest()

165

174

166

# find where new files came from

175

# find where new files came from

167

# we currently don't try to find where old files went, too expensive

176

# we currently don't try to find where old files went, too expensive

168

# this means we can miss a case like 'hg rm b; hg cp a b'

177

# this means we can miss a case like 'hg rm b; hg cp a b'

169

cm = {}

178

cm = {}

170

missing = set(b.manifest().iterkeys())

179

missing = _computeforwardmissing(a, b)

171

missing.difference_update(a.manifest().iterkeys())

172

173

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

180

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

174

for f in missing:

181

for f in missing:

175

fctx = b[f]

182

fctx = b[f]

176

fctx._ancestrycontext = ancestrycontext

183

fctx._ancestrycontext = ancestrycontext

177

ofctx = _tracefile(fctx, am, limit)

184

ofctx = _tracefile(fctx, am, limit)

178

if ofctx:

185

if ofctx:

179

cm[f] = ofctx.path()

186

cm[f] = ofctx.path()

180

187

181

# combine copies from dirstate if necessary

188

# combine copies from dirstate if necessary

182

if w is not None:

189

if w is not None:

183

cm = _chain(a, w, cm, _dirstatecopies(w))

190

cm = _chain(a, w, cm, _dirstatecopies(w))

184

191

185

return cm

192

return cm

186

193

187

def _backwardrenames(a, b):

194

def _backwardrenames(a, b):

188

# Even though we're not taking copies into account, 1:n rename situations

195

# Even though we're not taking copies into account, 1:n rename situations

189

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

196

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

190

# arbitrarily pick one of the renames.

197

# arbitrarily pick one of the renames.

191

f = _forwardcopies(b, a)

198

f = _forwardcopies(b, a)

192

r = {}

199

r = {}

193

for k, v in sorted(f.iteritems()):

200

for k, v in sorted(f.iteritems()):

194

# remove copies

201

# remove copies

195

if v in a:

202

if v in a:

196

continue

203

continue

197

r[v] = k

204

r[v] = k

198

return r

205

return r

199

206

200

def pathcopies(x, y):

207

def pathcopies(x, y):

201

'''find {dst@y: src@x} copy mapping for directed compare'''

208

'''find {dst@y: src@x} copy mapping for directed compare'''

202

if x == y or not x or not y:

209

if x == y or not x or not y:

203

return {}

210

return {}

204

a = y.ancestor(x)

211

a = y.ancestor(x)

205

if a == x:

212

if a == x:

206

return _forwardcopies(x, y)

213

return _forwardcopies(x, y)

207

if a == y:

214

if a == y:

208

return _backwardrenames(x, y)

215

return _backwardrenames(x, y)

209

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

216

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

210

217

211

def _computenonoverlap(repo, m1, m2, ma):

218

def _computenonoverlap(repo, m1, m2, ma):

212

"""Computes the files exclusive to m1 and m2.

219

"""Computes the files exclusive to m1 and m2.

213

This is its own function so extensions can easily wrap this call to see what

220

This is its own function so extensions can easily wrap this call to see what

214

files mergecopies is about to process.

221

files mergecopies is about to process.

215

"""

222

"""

216

u1 = _nonoverlap(m1, m2, ma)

223

u1 = _nonoverlap(m1, m2, ma)

217

u2 = _nonoverlap(m2, m1, ma)

224

u2 = _nonoverlap(m2, m1, ma)

218

225

219

if u1:

226

if u1:

220

repo.ui.debug(" unmatched files in local:\n %s\n"

227

repo.ui.debug(" unmatched files in local:\n %s\n"

221

% "\n ".join(u1))

228

% "\n ".join(u1))

222

if u2:

229

if u2:

223

repo.ui.debug(" unmatched files in other:\n %s\n"

230

repo.ui.debug(" unmatched files in other:\n %s\n"

224

% "\n ".join(u2))

231

% "\n ".join(u2))

225

return u1, u2

232

return u1, u2

226

233

227

def mergecopies(repo, c1, c2, ca):

234

def mergecopies(repo, c1, c2, ca):

228

"""

235

"""

229

Find moves and copies between context c1 and c2 that are relevant

236

Find moves and copies between context c1 and c2 that are relevant

230

for merging.

237

for merging.

231

238

232

Returns four dicts: "copy", "movewithdir", "diverge", and

239

Returns four dicts: "copy", "movewithdir", "diverge", and

233

"renamedelete".

240

"renamedelete".

234

241

235

"copy" is a mapping from destination name -> source name,

242

"copy" is a mapping from destination name -> source name,

236

where source is in c1 and destination is in c2 or vice-versa.

243

where source is in c1 and destination is in c2 or vice-versa.

237

244

238

"movewithdir" is a mapping from source name -> destination name,

245

"movewithdir" is a mapping from source name -> destination name,

239

where the file at source present in one context but not the other

246

where the file at source present in one context but not the other

240

needs to be moved to destination by the merge process, because the

247

needs to be moved to destination by the merge process, because the

241

other context moved the directory it is in.

248

other context moved the directory it is in.

242

249

243

"diverge" is a mapping of source name -> list of destination names

250

"diverge" is a mapping of source name -> list of destination names

244

for divergent renames.

251

for divergent renames.

245

252

246

"renamedelete" is a mapping of source name -> list of destination

253

"renamedelete" is a mapping of source name -> list of destination

247

names for files deleted in c1 that were renamed in c2 or vice-versa.

254

names for files deleted in c1 that were renamed in c2 or vice-versa.

248

"""

255

"""

249

# avoid silly behavior for update from empty dir

256

# avoid silly behavior for update from empty dir

250

if not c1 or not c2 or c1 == c2:

257

if not c1 or not c2 or c1 == c2:

251

return {}, {}, {}, {}

258

return {}, {}, {}, {}

252

259

253

# avoid silly behavior for parent -> working dir

260

# avoid silly behavior for parent -> working dir

254

if c2.node() is None and c1.node() == repo.dirstate.p1():

261

if c2.node() is None and c1.node() == repo.dirstate.p1():

255

return repo.dirstate.copies(), {}, {}, {}

262

return repo.dirstate.copies(), {}, {}, {}

256

263

257

limit = _findlimit(repo, c1.rev(), c2.rev())

264

limit = _findlimit(repo, c1.rev(), c2.rev())

258

if limit is None:

265

if limit is None:

259

# no common ancestor, no copies

266

# no common ancestor, no copies

260

return {}, {}, {}, {}

267

return {}, {}, {}, {}

261

m1 = c1.manifest()

268

m1 = c1.manifest()

262

m2 = c2.manifest()

269

m2 = c2.manifest()

263

ma = ca.manifest()

270

ma = ca.manifest()

264

271

265

def makectx(f, n):

272

def makectx(f, n):

266

if len(n) != 20: # in a working context?

273

if len(n) != 20: # in a working context?

267

if c1.rev() is None:

274

if c1.rev() is None:

268

return c1.filectx(f)

275

return c1.filectx(f)

269

return c2.filectx(f)

276

return c2.filectx(f)

270

return repo.filectx(f, fileid=n)

277

return repo.filectx(f, fileid=n)

271

278

272

ctx = util.lrucachefunc(makectx)

279

ctx = util.lrucachefunc(makectx)

273

copy = {}

280

copy = {}

274

movewithdir = {}

281

movewithdir = {}

275

fullcopy = {}

282

fullcopy = {}

276

diverge = {}

283

diverge = {}

277

284

278

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

285

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

279

286

280

u1, u2 = _computenonoverlap(repo, m1, m2, ma)

287

u1, u2 = _computenonoverlap(repo, m1, m2, ma)

281

288

282

for f in u1:

289

for f in u1:

283

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

290

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

284

291

285

for f in u2:

292

for f in u2:

286

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

293

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

287

294

288

renamedelete = {}

295

renamedelete = {}

289

renamedelete2 = set()

296

renamedelete2 = set()

290

diverge2 = set()

297

diverge2 = set()

291

for of, fl in diverge.items():

298

for of, fl in diverge.items():

292

if len(fl) == 1 or of in c1 or of in c2:

299

if len(fl) == 1 or of in c1 or of in c2:

293

del diverge[of] # not actually divergent, or not a rename

300

del diverge[of] # not actually divergent, or not a rename

294

if of not in c1 and of not in c2:

301

if of not in c1 and of not in c2:

295

# renamed on one side, deleted on the other side, but filter

302

# renamed on one side, deleted on the other side, but filter

296

# out files that have been renamed and then deleted

303

# out files that have been renamed and then deleted

297

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

304

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

298

renamedelete2.update(fl) # reverse map for below

305

renamedelete2.update(fl) # reverse map for below

299

else:

306

else:

300

diverge2.update(fl) # reverse map for below

307

diverge2.update(fl) # reverse map for below

301

308

302

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

309

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

303

if bothnew:

310

if bothnew:

304

repo.ui.debug(" unmatched files new in both:\n %s\n"

311

repo.ui.debug(" unmatched files new in both:\n %s\n"

305

% "\n ".join(bothnew))

312

% "\n ".join(bothnew))

306

bothdiverge, _copy, _fullcopy = {}, {}, {}

313

bothdiverge, _copy, _fullcopy = {}, {}, {}

307

for f in bothnew:

314

for f in bothnew:

308

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

315

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

309

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

316

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

310

for of, fl in bothdiverge.items():

317

for of, fl in bothdiverge.items():

311

if len(fl) == 2 and fl[0] == fl[1]:

318

if len(fl) == 2 and fl[0] == fl[1]:

312

copy[fl[0]] = of # not actually divergent, just matching renames

319

copy[fl[0]] = of # not actually divergent, just matching renames

313

320

314

if fullcopy and repo.ui.debugflag:

321

if fullcopy and repo.ui.debugflag:

315

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

322

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

316

"% = renamed and deleted):\n")

323

"% = renamed and deleted):\n")

317

for f in sorted(fullcopy):

324

for f in sorted(fullcopy):

318

note = ""

325

note = ""

319

if f in copy:

326

if f in copy:

320

note += "*"

327

note += "*"

321

if f in diverge2:

328

if f in diverge2:

322

note += "!"

329

note += "!"

323

if f in renamedelete2:

330

if f in renamedelete2:

324

note += "%"

331

note += "%"

325

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

332

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

326

note))

333

note))

327

del diverge2

334

del diverge2

328

335

329

if not fullcopy:

336

if not fullcopy:

330

return copy, movewithdir, diverge, renamedelete

337

return copy, movewithdir, diverge, renamedelete

331

338

332

repo.ui.debug(" checking for directory renames\n")

339

repo.ui.debug(" checking for directory renames\n")

333

340

334

# generate a directory move map

341

# generate a directory move map

335

d1, d2 = c1.dirs(), c2.dirs()

342

d1, d2 = c1.dirs(), c2.dirs()

336

d1.addpath('/')

343

d1.addpath('/')

337

d2.addpath('/')

344

d2.addpath('/')

338

invalid = set()

345

invalid = set()

339

dirmove = {}

346

dirmove = {}

340

347

341

# examine each file copy for a potential directory move, which is

348

# examine each file copy for a potential directory move, which is

342

# when all the files in a directory are moved to a new directory

349

# when all the files in a directory are moved to a new directory

343

for dst, src in fullcopy.iteritems():

350

for dst, src in fullcopy.iteritems():

344

dsrc, ddst = _dirname(src), _dirname(dst)

351

dsrc, ddst = _dirname(src), _dirname(dst)

345

if dsrc in invalid:

352

if dsrc in invalid:

346

# already seen to be uninteresting

353

# already seen to be uninteresting

347

continue

354

continue

348

elif dsrc in d1 and ddst in d1:

355

elif dsrc in d1 and ddst in d1:

349

# directory wasn't entirely moved locally

356

# directory wasn't entirely moved locally

350

invalid.add(dsrc)

357

invalid.add(dsrc)

351

elif dsrc in d2 and ddst in d2:

358

elif dsrc in d2 and ddst in d2:

352

# directory wasn't entirely moved remotely

359

# directory wasn't entirely moved remotely

353

invalid.add(dsrc)

360

invalid.add(dsrc)

354

elif dsrc in dirmove and dirmove[dsrc] != ddst:

361

elif dsrc in dirmove and dirmove[dsrc] != ddst:

355

# files from the same directory moved to two different places

362

# files from the same directory moved to two different places

356

invalid.add(dsrc)

363

invalid.add(dsrc)

357

else:

364

else:

358

# looks good so far

365

# looks good so far

359

dirmove[dsrc + "/"] = ddst + "/"

366

dirmove[dsrc + "/"] = ddst + "/"

360

367

361

for i in invalid:

368

for i in invalid:

362

if i in dirmove:

369

if i in dirmove:

363

del dirmove[i]

370

del dirmove[i]

364

del d1, d2, invalid

371

del d1, d2, invalid

365

372

366

if not dirmove:

373

if not dirmove:

367

return copy, movewithdir, diverge, renamedelete

374

return copy, movewithdir, diverge, renamedelete

368

375

369

for d in dirmove:

376

for d in dirmove:

370

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

377

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

371

(d, dirmove[d]))

378

(d, dirmove[d]))

372

379

373

# check unaccounted nonoverlapping files against directory moves

380

# check unaccounted nonoverlapping files against directory moves

374

for f in u1 + u2:

381

for f in u1 + u2:

375

if f not in fullcopy:

382

if f not in fullcopy:

376

for d in dirmove:

383

for d in dirmove:

377

if f.startswith(d):

384

if f.startswith(d):

378

# new file added in a directory that was moved, move it

385

# new file added in a directory that was moved, move it

379

df = dirmove[d] + f[len(d):]

386

df = dirmove[d] + f[len(d):]

380

if df not in copy:

387

if df not in copy:

381

movewithdir[f] = df

388

movewithdir[f] = df

382

repo.ui.debug((" pending file src: '%s' -> "

389

repo.ui.debug((" pending file src: '%s' -> "

383

"dst: '%s'\n") % (f, df))

390

"dst: '%s'\n") % (f, df))

384

break

391

break

385

392

386

return copy, movewithdir, diverge, renamedelete

393

return copy, movewithdir, diverge, renamedelete

387

394

388

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

395

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

389

"""

396

"""

390

check possible copies of f from m1 to m2

397

check possible copies of f from m1 to m2

391

398

392

ctx = function accepting (filename, node) that returns a filectx.

399

ctx = function accepting (filename, node) that returns a filectx.

393

f = the filename to check

400

f = the filename to check

394

m1 = the source manifest

401

m1 = the source manifest

395

m2 = the destination manifest

402

m2 = the destination manifest

396

ca = the changectx of the common ancestor

403

ca = the changectx of the common ancestor

397

limit = the rev number to not search beyond

404

limit = the rev number to not search beyond

398

diverge = record all diverges in this dict

405

diverge = record all diverges in this dict

399

copy = record all non-divergent copies in this dict

406

copy = record all non-divergent copies in this dict

400

fullcopy = record all copies in this dict

407

fullcopy = record all copies in this dict

401

"""

408

"""

402

409

403

ma = ca.manifest()

410

ma = ca.manifest()

404

411

405

def _related(f1, f2, limit):

412

def _related(f1, f2, limit):

406

# Walk back to common ancestor to see if the two files originate

413

# Walk back to common ancestor to see if the two files originate

407

# from the same file. Since workingfilectx's rev() is None it messes

414

# from the same file. Since workingfilectx's rev() is None it messes

408

# up the integer comparison logic, hence the pre-step check for

415

# up the integer comparison logic, hence the pre-step check for

409

# None (f1 and f2 can only be workingfilectx's initially).

416

# None (f1 and f2 can only be workingfilectx's initially).

410

417

411

if f1 == f2:

418

if f1 == f2:

412

return f1 # a match

419

return f1 # a match

413

420

414

g1, g2 = f1.ancestors(), f2.ancestors()

421

g1, g2 = f1.ancestors(), f2.ancestors()

415

try:

422

try:

416

f1r, f2r = f1.rev(), f2.rev()

423

f1r, f2r = f1.rev(), f2.rev()

417

424

418

if f1r is None:

425

if f1r is None:

419

f1 = g1.next()

426

f1 = g1.next()

420

if f2r is None:

427

if f2r is None:

421

f2 = g2.next()

428

f2 = g2.next()

422

429

423

while True:

430

while True:

424

f1r, f2r = f1.rev(), f2.rev()

431

f1r, f2r = f1.rev(), f2.rev()

425

if f1r > f2r:

432

if f1r > f2r:

426

f1 = g1.next()

433

f1 = g1.next()

427

elif f2r > f1r:

434

elif f2r > f1r:

428

f2 = g2.next()

435

f2 = g2.next()

429

elif f1 == f2:

436

elif f1 == f2:

430

return f1 # a match

437

return f1 # a match

431

elif f1r == f2r or f1r < limit or f2r < limit:

438

elif f1r == f2r or f1r < limit or f2r < limit:

432

return False # copy no longer relevant

439

return False # copy no longer relevant

433

except StopIteration:

440

except StopIteration:

434

return False

441

return False

435

442

436

of = None

443

of = None

437

seen = set([f])

444

seen = set([f])

438

for oc in ctx(f, m1[f]).ancestors():

445

for oc in ctx(f, m1[f]).ancestors():

439

ocr = oc.rev()

446

ocr = oc.rev()

440

of = oc.path()

447

of = oc.path()

441

if of in seen:

448

if of in seen:

442

# check limit late - grab last rename before

449

# check limit late - grab last rename before

443

if ocr < limit:

450

if ocr < limit:

444

break

451

break

445

continue

452

continue

446

seen.add(of)

453

seen.add(of)

447

454

448

fullcopy[f] = of # remember for dir rename detection

455

fullcopy[f] = of # remember for dir rename detection

449

if of not in m2:

456

if of not in m2:

450

continue # no match, keep looking

457

continue # no match, keep looking

451

if m2[of] == ma.get(of):

458

if m2[of] == ma.get(of):

452

break # no merge needed, quit early

459

break # no merge needed, quit early

453

c2 = ctx(of, m2[of])

460

c2 = ctx(of, m2[of])

454

cr = _related(oc, c2, ca.rev())

461

cr = _related(oc, c2, ca.rev())

455

if cr and (of == f or of == c2.path()): # non-divergent

462

if cr and (of == f or of == c2.path()): # non-divergent

456

copy[f] = of

463

copy[f] = of

457

of = None

464

of = None

458

break

465

break

459

466

460

if of in ma:

467

if of in ma:

461

diverge.setdefault(of, []).append(f)

468

diverge.setdefault(of, []).append(f)

462

469

463

def duplicatecopies(repo, rev, fromrev, skiprev=None):

470

def duplicatecopies(repo, rev, fromrev, skiprev=None):

464

'''reproduce copies from fromrev to rev in the dirstate

471

'''reproduce copies from fromrev to rev in the dirstate

465

472

466

If skiprev is specified, it's a revision that should be used to

473

If skiprev is specified, it's a revision that should be used to

467

filter copy records. Any copies that occur between fromrev and

474

filter copy records. Any copies that occur between fromrev and

468

skiprev will not be duplicated, even if they appear in the set of

475

skiprev will not be duplicated, even if they appear in the set of

469

copies between fromrev and rev.

476

copies between fromrev and rev.

470

'''

477

'''

471

exclude = {}

478

exclude = {}

472

if skiprev is not None:

479

if skiprev is not None:

473

exclude = pathcopies(repo[fromrev], repo[skiprev])

480

exclude = pathcopies(repo[fromrev], repo[skiprev])

474

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

481

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

475

# copies.pathcopies returns backward renames, so dst might not

482

# copies.pathcopies returns backward renames, so dst might not

476

# actually be in the dirstate

483

# actually be in the dirstate

477

if dst in exclude:

484

if dst in exclude:

478

continue

485

continue

479

if repo.dirstate[dst] in "nma":

486

if repo.dirstate[dst] in "nma":

480

repo.dirstate.copy(src, dst)

487

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
             def _nonoverlap(d1, d2, d3):
                 "Return list of elements in d1 not in d2 or d3"
                 return sorted([d for d in d1 if d not in d3 and d not in d2])
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
+            def _computeforwardmissing(a, b):
+                """Computes which files are in b but not a.
+                This is its own function so extensions can easily wrap this call to see what
+                files _forwardcopies is about to process.
+                """
+                missing = set(b.manifest().iterkeys())
+                missing.difference_update(a.manifest().iterkeys())
+                return missing
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
-                missing = set(b.manifest().iterkeys())
+                missing = _computeforwardmissing(a, b)
-                missing.difference_update(a.manifest().iterkeys())
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
             def _computenonoverlap(repo, m1, m2, ma):
                 """Computes the files exclusive to m1 and m2.
                 This is its own function so extensions can easily wrap this call to see what
                 files mergecopies is about to process.
                 """
                 u1 = _nonoverlap(m1, m2, ma)
                 u2 = _nonoverlap(m2, m1, ma)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 return u1, u2
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def makectx(f, n):
                     if len(n) != 20: # in a working context?
                         if c1.rev() is None:
                             return c1.filectx(f)
                         return c2.filectx(f)
                     return repo.filectx(f, fileid=n)
                 ctx = util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 u1, u2 = _computenonoverlap(repo, m1, m2, ma)
                 for f in u1:
                     checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
                 for f in u2:
                     checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
                 bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge, _copy, _fullcopy = {}, {}, {}
                 for f in bothnew:
                     checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
                     checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
                 """
                 check possible copies of f from m1 to m2
                 ctx = function accepting (filename, node) that returns a filectx.
                 f = the filename to check
                 m1 = the source manifest
                 m2 = the destination manifest
                 ca = the changectx of the common ancestor
                 limit = the rev number to not search beyond
                 diverge = record all diverges in this dict
                 copy = record all non-divergent copies in this dict
                 fullcopy = record all copies in this dict
                 """
                 ma = ca.manifest()
                 def _related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.rev(), f2.rev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.rev(), f2.rev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 of = None
                 seen = set([f])
                 for oc in ctx(f, m1[f]).ancestors():
                     ocr = oc.rev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     fullcopy[f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == ma.get(of):
                         break # no merge needed, quit early
                     c2 = ctx(of, m2[of])
                     cr = _related(oc, c2, ca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         copy[f] = of
                         of = None
                         break
                 if of in ma:
                     diverge.setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if skiprev is not None:
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)