upstream/mercurial-mirror Commit - r24186:61aadba2

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _dirname(f):

11

def _dirname(f):

12

s = f.rfind("/")

12

s = f.rfind("/")

13

if s == -1:

13

if s == -1:

14

return ""

14

return ""

15

return f[:s]

15

return f[:s]

16

17

def _findlimit(repo, a, b):

17

def _findlimit(repo, a, b):

18

"""

18

"""

19

Find the last revision that needs to be checked to ensure that a full

19

Find the last revision that needs to be checked to ensure that a full

20

transitive closure for file copies can be properly calculated.

20

transitive closure for file copies can be properly calculated.

21

Generally, this means finding the earliest revision number that's an

21

Generally, this means finding the earliest revision number that's an

22

ancestor of a or b but not both, except when a or b is a direct descendent

22

ancestor of a or b but not both, except when a or b is a direct descendent

23

of the other, in which case we can return the minimum revnum of a and b.

23

of the other, in which case we can return the minimum revnum of a and b.

24

None if no such revision exists.

24

None if no such revision exists.

25

"""

25

"""

26

27

# basic idea:

27

# basic idea:

28

# - mark a and b with different sides

28

# - mark a and b with different sides

29

# - if a parent's children are all on the same side, the parent is

29

# - if a parent's children are all on the same side, the parent is

30

# on that side, otherwise it is on no side

30

# on that side, otherwise it is on no side

31

# - walk the graph in topological order with the help of a heap;

31

# - walk the graph in topological order with the help of a heap;

32

# - add unseen parents to side map

32

# - add unseen parents to side map

33

# - clear side of any parent that has children on different sides

33

# - clear side of any parent that has children on different sides

34

# - track number of interesting revs that might still be on a side

34

# - track number of interesting revs that might still be on a side

35

# - track the lowest interesting rev seen

35

# - track the lowest interesting rev seen

36

# - quit when interesting revs is zero

36

# - quit when interesting revs is zero

37

38

cl = repo.changelog

38

cl = repo.changelog

39

working = len(cl) # pseudo rev for the working directory

39

working = len(cl) # pseudo rev for the working directory

40

if a is None:

40

if a is None:

41

a = working

41

a = working

42

if b is None:

42

if b is None:

43

b = working

43

b = working

44

45

side = {a: -1, b: 1}

45

side = {a: -1, b: 1}

46

visit = [-a, -b]

46

visit = [-a, -b]

47

heapq.heapify(visit)

47

heapq.heapify(visit)

48

interesting = len(visit)

48

interesting = len(visit)

49

hascommonancestor = False

49

hascommonancestor = False

50

limit = working

50

limit = working

51

52

while interesting:

52

while interesting:

53

r = -heapq.heappop(visit)

53

r = -heapq.heappop(visit)

54

if r == working:

54

if r == working:

55

parents = [cl.rev(p) for p in repo.dirstate.parents()]

55

parents = [cl.rev(p) for p in repo.dirstate.parents()]

56

else:

56

else:

57

parents = cl.parentrevs(r)

57

parents = cl.parentrevs(r)

58

for p in parents:

58

for p in parents:

59

if p < 0:

59

if p < 0:

60

continue

60

continue

61

if p not in side:

61

if p not in side:

62

# first time we see p; add it to visit

62

# first time we see p; add it to visit

63

side[p] = side[r]

63

side[p] = side[r]

64

if side[p]:

64

if side[p]:

65

interesting += 1

65

interesting += 1

66

heapq.heappush(visit, -p)

66

heapq.heappush(visit, -p)

67

elif side[p] and side[p] != side[r]:

67

elif side[p] and side[p] != side[r]:

68

# p was interesting but now we know better

68

# p was interesting but now we know better

69

side[p] = 0

69

side[p] = 0

70

interesting -= 1

70

interesting -= 1

71

hascommonancestor = True

71

hascommonancestor = True

72

if side[r]:

72

if side[r]:

73

limit = r # lowest rev visited

73

limit = r # lowest rev visited

74

interesting -= 1

74

interesting -= 1

75

76

if not hascommonancestor:

76

if not hascommonancestor:

77

return None

77

return None

78

79

# Consider the following flow (see test-commit-amend.t under issue4405):

79

# Consider the following flow (see test-commit-amend.t under issue4405):

80

# 1/ File 'a0' committed

80

# 1/ File 'a0' committed

81

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

81

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

82

# 3/ Move back to first commit

82

# 3/ Move back to first commit

83

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

83

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

84

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

84

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

85

#

85

#

86

# During the amend in step five, we will be in this state:

86

# During the amend in step five, we will be in this state:

87

#

87

#

88

# @ 3 temporary amend commit for a1-amend

88

# @ 3 temporary amend commit for a1-amend

89

# |

89

# |

90

# o 2 a1-amend

90

# o 2 a1-amend

91

# |

91

# |

92

# | o 1 a1

92

# | o 1 a1

93

# |/

93

# |/

94

# o 0 a0

94

# o 0 a0

95

#

95

#

96

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

96

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

97

# yet the filelog has the copy information in rev 1 and we will not look

97

# yet the filelog has the copy information in rev 1 and we will not look

98

# back far enough unless we also look at the a and b as candidates.

98

# back far enough unless we also look at the a and b as candidates.

99

# This only occurs when a is a descendent of b or visa-versa.

99

# This only occurs when a is a descendent of b or visa-versa.

100

return min(limit, a, b)

100

return min(limit, a, b)

101

102

def _chain(src, dst, a, b):

102

def _chain(src, dst, a, b):

103

'''chain two sets of copies a->b'''

103

'''chain two sets of copies a->b'''

104

t = a.copy()

104

t = a.copy()

105

for k, v in b.iteritems():

105

for k, v in b.iteritems():

106

if v in t:

106

if v in t:

107

# found a chain

107

# found a chain

108

if t[v] != k:

108

if t[v] != k:

109

# file wasn't renamed back to itself

109

# file wasn't renamed back to itself

110

t[k] = t[v]

110

t[k] = t[v]

111

if v not in dst:

111

if v not in dst:

112

# chain was a rename, not a copy

112

# chain was a rename, not a copy

113

del t[v]

113

del t[v]

114

if v in src:

114

if v in src:

115

# file is a copy of an existing file

115

# file is a copy of an existing file

116

t[k] = v

116

t[k] = v

117

118

# remove criss-crossed copies

118

# remove criss-crossed copies

119

for k, v in t.items():

119

for k, v in t.items():

120

if k in src and v in dst:

120

if k in src and v in dst:

121

del t[k]

121

del t[k]

122

123

return t

123

return t

124

125

def _tracefile(fctx, am, limit=-1):

125

def _tracefile(fctx, am, limit=-1):

126

'''return file context that is the ancestor of fctx present in ancestor

126

'''return file context that is the ancestor of fctx present in ancestor

127

manifest am, stopping after the first ancestor lower than limit'''

127

manifest am, stopping after the first ancestor lower than limit'''

128

129

for f in fctx.ancestors():

129

for f in fctx.ancestors():

130

if am.get(f.path(), None) == f.filenode():

130

if am.get(f.path(), None) == f.filenode():

131

return f

131

return f

132

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

132

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

133

return None

133

return None

134

135

def _dirstatecopies(d):

135

def _dirstatecopies(d):

136

ds = d._repo.dirstate

136

ds = d._repo.dirstate

137

c = ds.copies().copy()

137

c = ds.copies().copy()

138

for k in c.keys():

138

for k in c.keys():

139

if ds[k] not in 'anm':

139

if ds[k] not in 'anm':

140

del c[k]

140

del c[k]

141

return c

141

return c

142

143

def _computeforwardmissing(a, b):

143

def _computeforwardmissing(a, b):

144

"""Computes which files are in b but not a.

144

"""Computes which files are in b but not a.

145

This is its own function so extensions can easily wrap this call to see what

145

This is its own function so extensions can easily wrap this call to see what

146

files _forwardcopies is about to process.

146

files _forwardcopies is about to process.

147

"""

147

"""

148

return b.manifest().filesnotin(a.manifest())

148

return b.manifest().filesnotin(a.manifest())

149

150

def _forwardcopies(a, b):

150

def _forwardcopies(a, b):

151

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

151

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

152

153

# check for working copy

153

# check for working copy

154

w = None

154

w = None

155

if b.rev() is None:

155

if b.rev() is None:

156

w = b

156

w = b

157

b = w.p1()

157

b = w.p1()

158

if a == b:

158

if a == b:

159

# short-circuit to avoid issues with merge states

159

# short-circuit to avoid issues with merge states

160

return _dirstatecopies(w)

160

return _dirstatecopies(w)

161

162

# files might have to be traced back to the fctx parent of the last

162

# files might have to be traced back to the fctx parent of the last

163

# one-side-only changeset, but not further back than that

163

# one-side-only changeset, but not further back than that

164

limit = _findlimit(a._repo, a.rev(), b.rev())

164

limit = _findlimit(a._repo, a.rev(), b.rev())

165

if limit is None:

165

if limit is None:

166

limit = -1

166

limit = -1

167

am = a.manifest()

167

am = a.manifest()

168

169

# find where new files came from

169

# find where new files came from

170

# we currently don't try to find where old files went, too expensive

170

# we currently don't try to find where old files went, too expensive

171

# this means we can miss a case like 'hg rm b; hg cp a b'

171

# this means we can miss a case like 'hg rm b; hg cp a b'

172

cm = {}

172

cm = {}

173

missing = _computeforwardmissing(a, b)

173

missing = _computeforwardmissing(a, b)

174

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

174

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

175

for f in missing:

175

for f in missing:

176

fctx = b[f]

176

fctx = b[f]

177

fctx._ancestrycontext = ancestrycontext

177

fctx._ancestrycontext = ancestrycontext

178

ofctx = _tracefile(fctx, am, limit)

178

ofctx = _tracefile(fctx, am, limit)

179

if ofctx:

179

if ofctx:

180

cm[f] = ofctx.path()

180

cm[f] = ofctx.path()

181

182

# combine copies from dirstate if necessary

182

# combine copies from dirstate if necessary

183

if w is not None:

183

if w is not None:

184

cm = _chain(a, w, cm, _dirstatecopies(w))

184

cm = _chain(a, w, cm, _dirstatecopies(w))

185

186

return cm

186

return cm

187

188

def _backwardrenames(a, b):

188

def _backwardrenames(a, b):

189

# Even though we're not taking copies into account, 1:n rename situations

189

# Even though we're not taking copies into account, 1:n rename situations

190

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

190

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

191

# arbitrarily pick one of the renames.

191

# arbitrarily pick one of the renames.

192

f = _forwardcopies(b, a)

192

f = _forwardcopies(b, a)

193

r = {}

193

r = {}

194

for k, v in sorted(f.iteritems()):

194

for k, v in sorted(f.iteritems()):

195

# remove copies

195

# remove copies

196

if v in a:

196

if v in a:

197

continue

197

continue

198

r[v] = k

198

r[v] = k

199

return r

199

return r

200

201

def pathcopies(x, y):

201

def pathcopies(x, y):

202

'''find {dst@y: src@x} copy mapping for directed compare'''

202

'''find {dst@y: src@x} copy mapping for directed compare'''

203

if x == y or not x or not y:

203

if x == y or not x or not y:

204

return {}

204

return {}

205

a = y.ancestor(x)

205

a = y.ancestor(x)

206

if a == x:

206

if a == x:

207

return _forwardcopies(x, y)

207

return _forwardcopies(x, y)

208

if a == y:

208

if a == y:

209

return _backwardrenames(x, y)

209

return _backwardrenames(x, y)

210

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

210

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

211

212

def _computenonoverlap(repo, m1, m2, ma):

212

def _computenonoverlap(repo, m1, m2, ma):

213

"""Computes the files exclusive to m1 and m2.

213

"""Computes the files exclusive to m1 and m2.

214

This is its own function so extensions can easily wrap this call to see what

214

This is its own function so extensions can easily wrap this call to see what

215

files mergecopies is about to process.

215

files mergecopies is about to process.

216

"""

216

"""

217

addedinm1 = m1.filesnotin(ma)

217

addedinm1 = m1.filesnotin(ma)

218

addedinm2 = m2.filesnotin(ma)

218

addedinm2 = m2.filesnotin(ma)

219

u1 = sorted(addedinm1 - addedinm2)

219

u1 = sorted(addedinm1 - addedinm2)

220

u2 = sorted(addedinm2 - addedinm1)

220

u2 = sorted(addedinm2 - addedinm1)

221

222

if u1:

222

if u1:

223

repo.ui.debug(" unmatched files in local:\n %s\n"

223

repo.ui.debug(" unmatched files in local:\n %s\n"

224

% "\n ".join(u1))

224

% "\n ".join(u1))

225

if u2:

225

if u2:

226

repo.ui.debug(" unmatched files in other:\n %s\n"

226

repo.ui.debug(" unmatched files in other:\n %s\n"

227

% "\n ".join(u2))

227

% "\n ".join(u2))

228

return u1, u2

228

return u1, u2

229

230

def mergecopies(repo, c1, c2, ca):

230

def mergecopies(repo, c1, c2, ca):

231

"""

231

"""

232

Find moves and copies between context c1 and c2 that are relevant

232

Find moves and copies between context c1 and c2 that are relevant

233

for merging.

233

for merging.

234

235

Returns four dicts: "copy", "movewithdir", "diverge", and

235

Returns four dicts: "copy", "movewithdir", "diverge", and

236

"renamedelete".

236

"renamedelete".

237

238

"copy" is a mapping from destination name -> source name,

238

"copy" is a mapping from destination name -> source name,

239

where source is in c1 and destination is in c2 or vice-versa.

239

where source is in c1 and destination is in c2 or vice-versa.

240

241

"movewithdir" is a mapping from source name -> destination name,

241

"movewithdir" is a mapping from source name -> destination name,

242

where the file at source present in one context but not the other

242

where the file at source present in one context but not the other

243

needs to be moved to destination by the merge process, because the

243

needs to be moved to destination by the merge process, because the

244

other context moved the directory it is in.

244

other context moved the directory it is in.

245

246

"diverge" is a mapping of source name -> list of destination names

246

"diverge" is a mapping of source name -> list of destination names

247

for divergent renames.

247

for divergent renames.

248

249

"renamedelete" is a mapping of source name -> list of destination

249

"renamedelete" is a mapping of source name -> list of destination

250

names for files deleted in c1 that were renamed in c2 or vice-versa.

250

names for files deleted in c1 that were renamed in c2 or vice-versa.

251

"""

251

"""

252

# avoid silly behavior for update from empty dir

252

# avoid silly behavior for update from empty dir

253

if not c1 or not c2 or c1 == c2:

253

if not c1 or not c2 or c1 == c2:

254

return {}, {}, {}, {}

254

return {}, {}, {}, {}

255

256

# avoid silly behavior for parent -> working dir

256

# avoid silly behavior for parent -> working dir

257

if c2.node() is None and c1.node() == repo.dirstate.p1():

257

if c2.node() is None and c1.node() == repo.dirstate.p1():

258

return repo.dirstate.copies(), {}, {}, {}

258

return repo.dirstate.copies(), {}, {}, {}

259

260

limit = _findlimit(repo, c1.rev(), c2.rev())

260

limit = _findlimit(repo, c1.rev(), c2.rev())

261

if limit is None:

261

if limit is None:

262

# no common ancestor, no copies

262

# no common ancestor, no copies

263

return {}, {}, {}, {}

263

return {}, {}, {}, {}

264

m1 = c1.manifest()

264

m1 = c1.manifest()

265

m2 = c2.manifest()

265

m2 = c2.manifest()

266

ma = ca.manifest()

266

ma = ca.manifest()

267

268

def makectx(f, n):

268

def makectx(f, n):

269

if len(n) != 20: # in a working context?

269

if len(n) != 20: # in a working context?

270

if c1.rev() is None:

270

if c1.rev() is None:

271

return c1.filectx(f)

271

return c1.filectx(f)

272

return c2.filectx(f)

272

return c2.filectx(f)

273

return repo.filectx(f, fileid=n)

273

return repo.filectx(f, fileid=n)

274

275

ctx = util.lrucachefunc(makectx)

275

ctx = util.lrucachefunc(makectx)

276

copy = {}

276

copy = {}

277

movewithdir = {}

277

movewithdir = {}

278

fullcopy = {}

278

fullcopy = {}

279

diverge = {}

279

diverge = {}

280

281

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

281

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

282

283

u1, u2 = _computenonoverlap(repo, m1, m2, ma)

283

u1, u2 = _computenonoverlap(repo, m1, m2, ma)

284

285

for f in u1:

285

for f in u1:

286

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

286

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

287

288

for f in u2:

288

for f in u2:

289

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

289

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

290

291

renamedelete = {}

291

renamedelete = {}

292

renamedelete2 = set()

292

renamedelete2 = set()

293

diverge2 = set()

293

diverge2 = set()

294

for of, fl in diverge.items():

294

for of, fl in diverge.items():

295

if len(fl) == 1 or of in c1 or of in c2:

295

if len(fl) == 1 or of in c1 or of in c2:

296

del diverge[of] # not actually divergent, or not a rename

296

del diverge[of] # not actually divergent, or not a rename

297

if of not in c1 and of not in c2:

297

if of not in c1 and of not in c2:

298

# renamed on one side, deleted on the other side, but filter

298

# renamed on one side, deleted on the other side, but filter

299

# out files that have been renamed and then deleted

299

# out files that have been renamed and then deleted

300

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

300

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

301

renamedelete2.update(fl) # reverse map for below

301

renamedelete2.update(fl) # reverse map for below

302

else:

302

else:

303

diverge2.update(fl) # reverse map for below

303

diverge2.update(fl) # reverse map for below

304

305

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

305

addedinm1 = m1.filesnotin(ma)

306

addedinm2 = m2.filesnotin(ma)

307

bothnew = sorted(addedinm1 & addedinm2)

306

if bothnew:

308

if bothnew:

307

repo.ui.debug(" unmatched files new in both:\n %s\n"

309

repo.ui.debug(" unmatched files new in both:\n %s\n"

308

% "\n ".join(bothnew))

310

% "\n ".join(bothnew))

309

bothdiverge, _copy, _fullcopy = {}, {}, {}

311

bothdiverge, _copy, _fullcopy = {}, {}, {}

310

for f in bothnew:

312

for f in bothnew:

311

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

313

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

312

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

314

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

313

for of, fl in bothdiverge.items():

315

for of, fl in bothdiverge.items():

314

if len(fl) == 2 and fl[0] == fl[1]:

316

if len(fl) == 2 and fl[0] == fl[1]:

315

copy[fl[0]] = of # not actually divergent, just matching renames

317

copy[fl[0]] = of # not actually divergent, just matching renames

316

318

317

if fullcopy and repo.ui.debugflag:

319

if fullcopy and repo.ui.debugflag:

318

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

320

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

319

"% = renamed and deleted):\n")

321

"% = renamed and deleted):\n")

320

for f in sorted(fullcopy):

322

for f in sorted(fullcopy):

321

note = ""

323

note = ""

322

if f in copy:

324

if f in copy:

323

note += "*"

325

note += "*"

324

if f in diverge2:

326

if f in diverge2:

325

note += "!"

327

note += "!"

326

if f in renamedelete2:

328

if f in renamedelete2:

327

note += "%"

329

note += "%"

328

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

330

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

329

note))

331

note))

330

del diverge2

332

del diverge2

331

333

332

if not fullcopy:

334

if not fullcopy:

333

return copy, movewithdir, diverge, renamedelete

335

return copy, movewithdir, diverge, renamedelete

334

336

335

repo.ui.debug(" checking for directory renames\n")

337

repo.ui.debug(" checking for directory renames\n")

336

338

337

# generate a directory move map

339

# generate a directory move map

338

d1, d2 = c1.dirs(), c2.dirs()

340

d1, d2 = c1.dirs(), c2.dirs()

339

d1.addpath('/')

341

d1.addpath('/')

340

d2.addpath('/')

342

d2.addpath('/')

341

invalid = set()

343

invalid = set()

342

dirmove = {}

344

dirmove = {}

343

345

344

# examine each file copy for a potential directory move, which is

346

# examine each file copy for a potential directory move, which is

345

# when all the files in a directory are moved to a new directory

347

# when all the files in a directory are moved to a new directory

346

for dst, src in fullcopy.iteritems():

348

for dst, src in fullcopy.iteritems():

347

dsrc, ddst = _dirname(src), _dirname(dst)

349

dsrc, ddst = _dirname(src), _dirname(dst)

348

if dsrc in invalid:

350

if dsrc in invalid:

349

# already seen to be uninteresting

351

# already seen to be uninteresting

350

continue

352

continue

351

elif dsrc in d1 and ddst in d1:

353

elif dsrc in d1 and ddst in d1:

352

# directory wasn't entirely moved locally

354

# directory wasn't entirely moved locally

353

invalid.add(dsrc)

355

invalid.add(dsrc)

354

elif dsrc in d2 and ddst in d2:

356

elif dsrc in d2 and ddst in d2:

355

# directory wasn't entirely moved remotely

357

# directory wasn't entirely moved remotely

356

invalid.add(dsrc)

358

invalid.add(dsrc)

357

elif dsrc in dirmove and dirmove[dsrc] != ddst:

359

elif dsrc in dirmove and dirmove[dsrc] != ddst:

358

# files from the same directory moved to two different places

360

# files from the same directory moved to two different places

359

invalid.add(dsrc)

361

invalid.add(dsrc)

360

else:

362

else:

361

# looks good so far

363

# looks good so far

362

dirmove[dsrc + "/"] = ddst + "/"

364

dirmove[dsrc + "/"] = ddst + "/"

363

365

364

for i in invalid:

366

for i in invalid:

365

if i in dirmove:

367

if i in dirmove:

366

del dirmove[i]

368

del dirmove[i]

367

del d1, d2, invalid

369

del d1, d2, invalid

368

370

369

if not dirmove:

371

if not dirmove:

370

return copy, movewithdir, diverge, renamedelete

372

return copy, movewithdir, diverge, renamedelete

371

373

372

for d in dirmove:

374

for d in dirmove:

373

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

375

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

374

(d, dirmove[d]))

376

(d, dirmove[d]))

375

377

376

# check unaccounted nonoverlapping files against directory moves

378

# check unaccounted nonoverlapping files against directory moves

377

for f in u1 + u2:

379

for f in u1 + u2:

378

if f not in fullcopy:

380

if f not in fullcopy:

379

for d in dirmove:

381

for d in dirmove:

380

if f.startswith(d):

382

if f.startswith(d):

381

# new file added in a directory that was moved, move it

383

# new file added in a directory that was moved, move it

382

df = dirmove[d] + f[len(d):]

384

df = dirmove[d] + f[len(d):]

383

if df not in copy:

385

if df not in copy:

384

movewithdir[f] = df

386

movewithdir[f] = df

385

repo.ui.debug((" pending file src: '%s' -> "

387

repo.ui.debug((" pending file src: '%s' -> "

386

"dst: '%s'\n") % (f, df))

388

"dst: '%s'\n") % (f, df))

387

break

389

break

388

390

389

return copy, movewithdir, diverge, renamedelete

391

return copy, movewithdir, diverge, renamedelete

390

392

391

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

393

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

392

"""

394

"""

393

check possible copies of f from m1 to m2

395

check possible copies of f from m1 to m2

394

396

395

ctx = function accepting (filename, node) that returns a filectx.

397

ctx = function accepting (filename, node) that returns a filectx.

396

f = the filename to check

398

f = the filename to check

397

m1 = the source manifest

399

m1 = the source manifest

398

m2 = the destination manifest

400

m2 = the destination manifest

399

ca = the changectx of the common ancestor

401

ca = the changectx of the common ancestor

400

limit = the rev number to not search beyond

402

limit = the rev number to not search beyond

401

diverge = record all diverges in this dict

403

diverge = record all diverges in this dict

402

copy = record all non-divergent copies in this dict

404

copy = record all non-divergent copies in this dict

403

fullcopy = record all copies in this dict

405

fullcopy = record all copies in this dict

404

"""

406

"""

405

407

406

ma = ca.manifest()

408

ma = ca.manifest()

407

409

408

def _related(f1, f2, limit):

410

def _related(f1, f2, limit):

409

# Walk back to common ancestor to see if the two files originate

411

# Walk back to common ancestor to see if the two files originate

410

# from the same file. Since workingfilectx's rev() is None it messes

412

# from the same file. Since workingfilectx's rev() is None it messes

411

# up the integer comparison logic, hence the pre-step check for

413

# up the integer comparison logic, hence the pre-step check for

412

# None (f1 and f2 can only be workingfilectx's initially).

414

# None (f1 and f2 can only be workingfilectx's initially).

413

415

414

if f1 == f2:

416

if f1 == f2:

415

return f1 # a match

417

return f1 # a match

416

418

417

g1, g2 = f1.ancestors(), f2.ancestors()

419

g1, g2 = f1.ancestors(), f2.ancestors()

418

try:

420

try:

419

f1r, f2r = f1.rev(), f2.rev()

421

f1r, f2r = f1.rev(), f2.rev()

420

422

421

if f1r is None:

423

if f1r is None:

422

f1 = g1.next()

424

f1 = g1.next()

423

if f2r is None:

425

if f2r is None:

424

f2 = g2.next()

426

f2 = g2.next()

425

427

426

while True:

428

while True:

427

f1r, f2r = f1.rev(), f2.rev()

429

f1r, f2r = f1.rev(), f2.rev()

428

if f1r > f2r:

430

if f1r > f2r:

429

f1 = g1.next()

431

f1 = g1.next()

430

elif f2r > f1r:

432

elif f2r > f1r:

431

f2 = g2.next()

433

f2 = g2.next()

432

elif f1 == f2:

434

elif f1 == f2:

433

return f1 # a match

435

return f1 # a match

434

elif f1r == f2r or f1r < limit or f2r < limit:

436

elif f1r == f2r or f1r < limit or f2r < limit:

435

return False # copy no longer relevant

437

return False # copy no longer relevant

436

except StopIteration:

438

except StopIteration:

437

return False

439

return False

438

440

439

of = None

441

of = None

440

seen = set([f])

442

seen = set([f])

441

for oc in ctx(f, m1[f]).ancestors():

443

for oc in ctx(f, m1[f]).ancestors():

442

ocr = oc.rev()

444

ocr = oc.rev()

443

of = oc.path()

445

of = oc.path()

444

if of in seen:

446

if of in seen:

445

# check limit late - grab last rename before

447

# check limit late - grab last rename before

446

if ocr < limit:

448

if ocr < limit:

447

break

449

break

448

continue

450

continue

449

seen.add(of)

451

seen.add(of)

450

452

451

fullcopy[f] = of # remember for dir rename detection

453

fullcopy[f] = of # remember for dir rename detection

452

if of not in m2:

454

if of not in m2:

453

continue # no match, keep looking

455

continue # no match, keep looking

454

if m2[of] == ma.get(of):

456

if m2[of] == ma.get(of):

455

break # no merge needed, quit early

457

break # no merge needed, quit early

456

c2 = ctx(of, m2[of])

458

c2 = ctx(of, m2[of])

457

cr = _related(oc, c2, ca.rev())

459

cr = _related(oc, c2, ca.rev())

458

if cr and (of == f or of == c2.path()): # non-divergent

460

if cr and (of == f or of == c2.path()): # non-divergent

459

copy[f] = of

461

copy[f] = of

460

of = None

462

of = None

461

break

463

break

462

464

463

if of in ma:

465

if of in ma:

464

diverge.setdefault(of, []).append(f)

466

diverge.setdefault(of, []).append(f)

465

467

466

def duplicatecopies(repo, rev, fromrev, skiprev=None):

468

def duplicatecopies(repo, rev, fromrev, skiprev=None):

467

'''reproduce copies from fromrev to rev in the dirstate

469

'''reproduce copies from fromrev to rev in the dirstate

468

470

469

If skiprev is specified, it's a revision that should be used to

471

If skiprev is specified, it's a revision that should be used to

470

filter copy records. Any copies that occur between fromrev and

472

filter copy records. Any copies that occur between fromrev and

471

skiprev will not be duplicated, even if they appear in the set of

473

skiprev will not be duplicated, even if they appear in the set of

472

copies between fromrev and rev.

474

copies between fromrev and rev.

473

'''

475

'''

474

exclude = {}

476

exclude = {}

475

if skiprev is not None:

477

if skiprev is not None:

476

exclude = pathcopies(repo[fromrev], repo[skiprev])

478

exclude = pathcopies(repo[fromrev], repo[skiprev])

477

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

479

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

478

# copies.pathcopies returns backward renames, so dst might not

480

# copies.pathcopies returns backward renames, so dst might not

479

# actually be in the dirstate

481

# actually be in the dirstate

480

if dst in exclude:

482

if dst in exclude:

481

continue

483

continue

482

if repo.dirstate[dst] in "nma":

484

if repo.dirstate[dst] in "nma":

483

repo.dirstate.copy(src, dst)

485

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 return b.manifest().filesnotin(a.manifest())
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 missing = _computeforwardmissing(a, b)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
             def _computenonoverlap(repo, m1, m2, ma):
                 """Computes the files exclusive to m1 and m2.
                 This is its own function so extensions can easily wrap this call to see what
                 files mergecopies is about to process.
                 """
                 addedinm1 = m1.filesnotin(ma)
                 addedinm2 = m2.filesnotin(ma)
                 u1 = sorted(addedinm1 - addedinm2)
                 u2 = sorted(addedinm2 - addedinm1)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 return u1, u2
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def makectx(f, n):
                     if len(n) != 20: # in a working context?
                         if c1.rev() is None:
                             return c1.filectx(f)
                         return c2.filectx(f)
                     return repo.filectx(f, fileid=n)
                 ctx = util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 u1, u2 = _computenonoverlap(repo, m1, m2, ma)
                 for f in u1:
                     checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
                 for f in u2:
                     checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
-                bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
+                addedinm1 = m1.filesnotin(ma)
+                addedinm2 = m2.filesnotin(ma)
+                bothnew = sorted(addedinm1 & addedinm2)
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge, _copy, _fullcopy = {}, {}, {}
                 for f in bothnew:
                     checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
                     checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
                 """
                 check possible copies of f from m1 to m2
                 ctx = function accepting (filename, node) that returns a filectx.
                 f = the filename to check
                 m1 = the source manifest
                 m2 = the destination manifest
                 ca = the changectx of the common ancestor
                 limit = the rev number to not search beyond
                 diverge = record all diverges in this dict
                 copy = record all non-divergent copies in this dict
                 fullcopy = record all copies in this dict
                 """
                 ma = ca.manifest()
                 def _related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.rev(), f2.rev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.rev(), f2.rev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 of = None
                 seen = set([f])
                 for oc in ctx(f, m1[f]).ancestors():
                     ocr = oc.rev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     fullcopy[f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == ma.get(of):
                         break # no merge needed, quit early
                     c2 = ctx(of, m2[of])
                     cr = _related(oc, c2, ca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         copy[f] = of
                         of = None
                         break
                 if of in ma:
                     diverge.setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if skiprev is not None:
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)