upstream/mercurial-mirror Commit - r24185:3a3806fe

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _nonoverlap(d1, d2, d3):

12

"Return list of elements in d1 not in d2 or d3"

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

14

15

def _dirname(f):

11

def _dirname(f):

16

s = f.rfind("/")

12

s = f.rfind("/")

17

if s == -1:

13

if s == -1:

18

return ""

14

return ""

19

return f[:s]

15

return f[:s]

20

16

21

def _findlimit(repo, a, b):

17

def _findlimit(repo, a, b):

22

"""

18

"""

23

Find the last revision that needs to be checked to ensure that a full

19

Find the last revision that needs to be checked to ensure that a full

24

transitive closure for file copies can be properly calculated.

20

transitive closure for file copies can be properly calculated.

25

Generally, this means finding the earliest revision number that's an

21

Generally, this means finding the earliest revision number that's an

26

ancestor of a or b but not both, except when a or b is a direct descendent

22

ancestor of a or b but not both, except when a or b is a direct descendent

27

of the other, in which case we can return the minimum revnum of a and b.

23

of the other, in which case we can return the minimum revnum of a and b.

28

None if no such revision exists.

24

None if no such revision exists.

29

"""

25

"""

30

26

31

# basic idea:

27

# basic idea:

32

# - mark a and b with different sides

28

# - mark a and b with different sides

33

# - if a parent's children are all on the same side, the parent is

29

# - if a parent's children are all on the same side, the parent is

34

# on that side, otherwise it is on no side

30

# on that side, otherwise it is on no side

35

# - walk the graph in topological order with the help of a heap;

31

# - walk the graph in topological order with the help of a heap;

36

# - add unseen parents to side map

32

# - add unseen parents to side map

37

# - clear side of any parent that has children on different sides

33

# - clear side of any parent that has children on different sides

38

# - track number of interesting revs that might still be on a side

34

# - track number of interesting revs that might still be on a side

39

# - track the lowest interesting rev seen

35

# - track the lowest interesting rev seen

40

# - quit when interesting revs is zero

36

# - quit when interesting revs is zero

41

37

42

cl = repo.changelog

38

cl = repo.changelog

43

working = len(cl) # pseudo rev for the working directory

39

working = len(cl) # pseudo rev for the working directory

44

if a is None:

40

if a is None:

45

a = working

41

a = working

46

if b is None:

42

if b is None:

47

b = working

43

b = working

48

44

49

side = {a: -1, b: 1}

45

side = {a: -1, b: 1}

50

visit = [-a, -b]

46

visit = [-a, -b]

51

heapq.heapify(visit)

47

heapq.heapify(visit)

52

interesting = len(visit)

48

interesting = len(visit)

53

hascommonancestor = False

49

hascommonancestor = False

54

limit = working

50

limit = working

55

51

56

while interesting:

52

while interesting:

57

r = -heapq.heappop(visit)

53

r = -heapq.heappop(visit)

58

if r == working:

54

if r == working:

59

parents = [cl.rev(p) for p in repo.dirstate.parents()]

55

parents = [cl.rev(p) for p in repo.dirstate.parents()]

60

else:

56

else:

61

parents = cl.parentrevs(r)

57

parents = cl.parentrevs(r)

62

for p in parents:

58

for p in parents:

63

if p < 0:

59

if p < 0:

64

continue

60

continue

65

if p not in side:

61

if p not in side:

66

# first time we see p; add it to visit

62

# first time we see p; add it to visit

67

side[p] = side[r]

63

side[p] = side[r]

68

if side[p]:

64

if side[p]:

69

interesting += 1

65

interesting += 1

70

heapq.heappush(visit, -p)

66

heapq.heappush(visit, -p)

71

elif side[p] and side[p] != side[r]:

67

elif side[p] and side[p] != side[r]:

72

# p was interesting but now we know better

68

# p was interesting but now we know better

73

side[p] = 0

69

side[p] = 0

74

interesting -= 1

70

interesting -= 1

75

hascommonancestor = True

71

hascommonancestor = True

76

if side[r]:

72

if side[r]:

77

limit = r # lowest rev visited

73

limit = r # lowest rev visited

78

interesting -= 1

74

interesting -= 1

79

75

80

if not hascommonancestor:

76

if not hascommonancestor:

81

return None

77

return None

82

78

83

# Consider the following flow (see test-commit-amend.t under issue4405):

79

# Consider the following flow (see test-commit-amend.t under issue4405):

84

# 1/ File 'a0' committed

80

# 1/ File 'a0' committed

85

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

81

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

86

# 3/ Move back to first commit

82

# 3/ Move back to first commit

87

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

83

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

88

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

84

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

89

#

85

#

90

# During the amend in step five, we will be in this state:

86

# During the amend in step five, we will be in this state:

91

#

87

#

92

# @ 3 temporary amend commit for a1-amend

88

# @ 3 temporary amend commit for a1-amend

93

# |

89

# |

94

# o 2 a1-amend

90

# o 2 a1-amend

95

# |

91

# |

96

# | o 1 a1

92

# | o 1 a1

97

# |/

93

# |/

98

# o 0 a0

94

# o 0 a0

99

#

95

#

100

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

96

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

101

# yet the filelog has the copy information in rev 1 and we will not look

97

# yet the filelog has the copy information in rev 1 and we will not look

102

# back far enough unless we also look at the a and b as candidates.

98

# back far enough unless we also look at the a and b as candidates.

103

# This only occurs when a is a descendent of b or visa-versa.

99

# This only occurs when a is a descendent of b or visa-versa.

104

return min(limit, a, b)

100

return min(limit, a, b)

105

101

106

def _chain(src, dst, a, b):

102

def _chain(src, dst, a, b):

107

'''chain two sets of copies a->b'''

103

'''chain two sets of copies a->b'''

108

t = a.copy()

104

t = a.copy()

109

for k, v in b.iteritems():

105

for k, v in b.iteritems():

110

if v in t:

106

if v in t:

111

# found a chain

107

# found a chain

112

if t[v] != k:

108

if t[v] != k:

113

# file wasn't renamed back to itself

109

# file wasn't renamed back to itself

114

t[k] = t[v]

110

t[k] = t[v]

115

if v not in dst:

111

if v not in dst:

116

# chain was a rename, not a copy

112

# chain was a rename, not a copy

117

del t[v]

113

del t[v]

118

if v in src:

114

if v in src:

119

# file is a copy of an existing file

115

# file is a copy of an existing file

120

t[k] = v

116

t[k] = v

121

117

122

# remove criss-crossed copies

118

# remove criss-crossed copies

123

for k, v in t.items():

119

for k, v in t.items():

124

if k in src and v in dst:

120

if k in src and v in dst:

125

del t[k]

121

del t[k]

126

122

127

return t

123

return t

128

124

129

def _tracefile(fctx, am, limit=-1):

125

def _tracefile(fctx, am, limit=-1):

130

'''return file context that is the ancestor of fctx present in ancestor

126

'''return file context that is the ancestor of fctx present in ancestor

131

manifest am, stopping after the first ancestor lower than limit'''

127

manifest am, stopping after the first ancestor lower than limit'''

132

128

133

for f in fctx.ancestors():

129

for f in fctx.ancestors():

134

if am.get(f.path(), None) == f.filenode():

130

if am.get(f.path(), None) == f.filenode():

135

return f

131

return f

136

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

132

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

137

return None

133

return None

138

134

139

def _dirstatecopies(d):

135

def _dirstatecopies(d):

140

ds = d._repo.dirstate

136

ds = d._repo.dirstate

141

c = ds.copies().copy()

137

c = ds.copies().copy()

142

for k in c.keys():

138

for k in c.keys():

143

if ds[k] not in 'anm':

139

if ds[k] not in 'anm':

144

del c[k]

140

del c[k]

145

return c

141

return c

146

142

147

def _computeforwardmissing(a, b):

143

def _computeforwardmissing(a, b):

148

"""Computes which files are in b but not a.

144

"""Computes which files are in b but not a.

149

This is its own function so extensions can easily wrap this call to see what

145

This is its own function so extensions can easily wrap this call to see what

150

files _forwardcopies is about to process.

146

files _forwardcopies is about to process.

151

"""

147

"""

152

return b.manifest().filesnotin(a.manifest())

148

return b.manifest().filesnotin(a.manifest())

153

149

154

def _forwardcopies(a, b):

150

def _forwardcopies(a, b):

155

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

151

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

156

152

157

# check for working copy

153

# check for working copy

158

w = None

154

w = None

159

if b.rev() is None:

155

if b.rev() is None:

160

w = b

156

w = b

161

b = w.p1()

157

b = w.p1()

162

if a == b:

158

if a == b:

163

# short-circuit to avoid issues with merge states

159

# short-circuit to avoid issues with merge states

164

return _dirstatecopies(w)

160

return _dirstatecopies(w)

165

161

166

# files might have to be traced back to the fctx parent of the last

162

# files might have to be traced back to the fctx parent of the last

167

# one-side-only changeset, but not further back than that

163

# one-side-only changeset, but not further back than that

168

limit = _findlimit(a._repo, a.rev(), b.rev())

164

limit = _findlimit(a._repo, a.rev(), b.rev())

169

if limit is None:

165

if limit is None:

170

limit = -1

166

limit = -1

171

am = a.manifest()

167

am = a.manifest()

172

168

173

# find where new files came from

169

# find where new files came from

174

# we currently don't try to find where old files went, too expensive

170

# we currently don't try to find where old files went, too expensive

175

# this means we can miss a case like 'hg rm b; hg cp a b'

171

# this means we can miss a case like 'hg rm b; hg cp a b'

176

cm = {}

172

cm = {}

177

missing = _computeforwardmissing(a, b)

173

missing = _computeforwardmissing(a, b)

178

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

174

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

179

for f in missing:

175

for f in missing:

180

fctx = b[f]

176

fctx = b[f]

181

fctx._ancestrycontext = ancestrycontext

177

fctx._ancestrycontext = ancestrycontext

182

ofctx = _tracefile(fctx, am, limit)

178

ofctx = _tracefile(fctx, am, limit)

183

if ofctx:

179

if ofctx:

184

cm[f] = ofctx.path()

180

cm[f] = ofctx.path()

185

181

186

# combine copies from dirstate if necessary

182

# combine copies from dirstate if necessary

187

if w is not None:

183

if w is not None:

188

cm = _chain(a, w, cm, _dirstatecopies(w))

184

cm = _chain(a, w, cm, _dirstatecopies(w))

189

185

190

return cm

186

return cm

191

187

192

def _backwardrenames(a, b):

188

def _backwardrenames(a, b):

193

# Even though we're not taking copies into account, 1:n rename situations

189

# Even though we're not taking copies into account, 1:n rename situations

194

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

190

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

195

# arbitrarily pick one of the renames.

191

# arbitrarily pick one of the renames.

196

f = _forwardcopies(b, a)

192

f = _forwardcopies(b, a)

197

r = {}

193

r = {}

198

for k, v in sorted(f.iteritems()):

194

for k, v in sorted(f.iteritems()):

199

# remove copies

195

# remove copies

200

if v in a:

196

if v in a:

201

continue

197

continue

202

r[v] = k

198

r[v] = k

203

return r

199

return r

204

200

205

def pathcopies(x, y):

201

def pathcopies(x, y):

206

'''find {dst@y: src@x} copy mapping for directed compare'''

202

'''find {dst@y: src@x} copy mapping for directed compare'''

207

if x == y or not x or not y:

203

if x == y or not x or not y:

208

return {}

204

return {}

209

a = y.ancestor(x)

205

a = y.ancestor(x)

210

if a == x:

206

if a == x:

211

return _forwardcopies(x, y)

207

return _forwardcopies(x, y)

212

if a == y:

208

if a == y:

213

return _backwardrenames(x, y)

209

return _backwardrenames(x, y)

214

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

210

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

215

211

216

def _computenonoverlap(repo, m1, m2, ma):

212

def _computenonoverlap(repo, m1, m2, ma):

217

"""Computes the files exclusive to m1 and m2.

213

"""Computes the files exclusive to m1 and m2.

218

This is its own function so extensions can easily wrap this call to see what

214

This is its own function so extensions can easily wrap this call to see what

219

files mergecopies is about to process.

215

files mergecopies is about to process.

220

"""

216

"""

221

u1 = _nonoverlap(m1, m2, ma)

217

addedinm1 = m1.filesnotin(ma)

222

u2 = _nonoverlap(m2, m1, ma)

218

addedinm2 = m2.filesnotin(ma)

219

u1 = sorted(addedinm1 - addedinm2)

220

u2 = sorted(addedinm2 - addedinm1)

223

221

224

if u1:

222

if u1:

225

repo.ui.debug(" unmatched files in local:\n %s\n"

223

repo.ui.debug(" unmatched files in local:\n %s\n"

226

% "\n ".join(u1))

224

% "\n ".join(u1))

227

if u2:

225

if u2:

228

repo.ui.debug(" unmatched files in other:\n %s\n"

226

repo.ui.debug(" unmatched files in other:\n %s\n"

229

% "\n ".join(u2))

227

% "\n ".join(u2))

230

return u1, u2

228

return u1, u2

231

229

232

def mergecopies(repo, c1, c2, ca):

230

def mergecopies(repo, c1, c2, ca):

233

"""

231

"""

234

Find moves and copies between context c1 and c2 that are relevant

232

Find moves and copies between context c1 and c2 that are relevant

235

for merging.

233

for merging.

236

234

237

Returns four dicts: "copy", "movewithdir", "diverge", and

235

Returns four dicts: "copy", "movewithdir", "diverge", and

238

"renamedelete".

236

"renamedelete".

239

237

240

"copy" is a mapping from destination name -> source name,

238

"copy" is a mapping from destination name -> source name,

241

where source is in c1 and destination is in c2 or vice-versa.

239

where source is in c1 and destination is in c2 or vice-versa.

242

240

243

"movewithdir" is a mapping from source name -> destination name,

241

"movewithdir" is a mapping from source name -> destination name,

244

where the file at source present in one context but not the other

242

where the file at source present in one context but not the other

245

needs to be moved to destination by the merge process, because the

243

needs to be moved to destination by the merge process, because the

246

other context moved the directory it is in.

244

other context moved the directory it is in.

247

245

248

"diverge" is a mapping of source name -> list of destination names

246

"diverge" is a mapping of source name -> list of destination names

249

for divergent renames.

247

for divergent renames.

250

248

251

"renamedelete" is a mapping of source name -> list of destination

249

"renamedelete" is a mapping of source name -> list of destination

252

names for files deleted in c1 that were renamed in c2 or vice-versa.

250

names for files deleted in c1 that were renamed in c2 or vice-versa.

253

"""

251

"""

254

# avoid silly behavior for update from empty dir

252

# avoid silly behavior for update from empty dir

255

if not c1 or not c2 or c1 == c2:

253

if not c1 or not c2 or c1 == c2:

256

return {}, {}, {}, {}

254

return {}, {}, {}, {}

257

255

258

# avoid silly behavior for parent -> working dir

256

# avoid silly behavior for parent -> working dir

259

if c2.node() is None and c1.node() == repo.dirstate.p1():

257

if c2.node() is None and c1.node() == repo.dirstate.p1():

260

return repo.dirstate.copies(), {}, {}, {}

258

return repo.dirstate.copies(), {}, {}, {}

261

259

262

limit = _findlimit(repo, c1.rev(), c2.rev())

260

limit = _findlimit(repo, c1.rev(), c2.rev())

263

if limit is None:

261

if limit is None:

264

# no common ancestor, no copies

262

# no common ancestor, no copies

265

return {}, {}, {}, {}

263

return {}, {}, {}, {}

266

m1 = c1.manifest()

264

m1 = c1.manifest()

267

m2 = c2.manifest()

265

m2 = c2.manifest()

268

ma = ca.manifest()

266

ma = ca.manifest()

269

267

270

def makectx(f, n):

268

def makectx(f, n):

271

if len(n) != 20: # in a working context?

269

if len(n) != 20: # in a working context?

272

if c1.rev() is None:

270

if c1.rev() is None:

273

return c1.filectx(f)

271

return c1.filectx(f)

274

return c2.filectx(f)

272

return c2.filectx(f)

275

return repo.filectx(f, fileid=n)

273

return repo.filectx(f, fileid=n)

276

274

277

ctx = util.lrucachefunc(makectx)

275

ctx = util.lrucachefunc(makectx)

278

copy = {}

276

copy = {}

279

movewithdir = {}

277

movewithdir = {}

280

fullcopy = {}

278

fullcopy = {}

281

diverge = {}

279

diverge = {}

282

280

283

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

281

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

284

282

285

u1, u2 = _computenonoverlap(repo, m1, m2, ma)

283

u1, u2 = _computenonoverlap(repo, m1, m2, ma)

286

284

287

for f in u1:

285

for f in u1:

288

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

286

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

289

287

290

for f in u2:

288

for f in u2:

291

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

289

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

292

290

293

renamedelete = {}

291

renamedelete = {}

294

renamedelete2 = set()

292

renamedelete2 = set()

295

diverge2 = set()

293

diverge2 = set()

296

for of, fl in diverge.items():

294

for of, fl in diverge.items():

297

if len(fl) == 1 or of in c1 or of in c2:

295

if len(fl) == 1 or of in c1 or of in c2:

298

del diverge[of] # not actually divergent, or not a rename

296

del diverge[of] # not actually divergent, or not a rename

299

if of not in c1 and of not in c2:

297

if of not in c1 and of not in c2:

300

# renamed on one side, deleted on the other side, but filter

298

# renamed on one side, deleted on the other side, but filter

301

# out files that have been renamed and then deleted

299

# out files that have been renamed and then deleted

302

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

300

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

303

renamedelete2.update(fl) # reverse map for below

301

renamedelete2.update(fl) # reverse map for below

304

else:

302

else:

305

diverge2.update(fl) # reverse map for below

303

diverge2.update(fl) # reverse map for below

306

304

307

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

305

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

308

if bothnew:

306

if bothnew:

309

repo.ui.debug(" unmatched files new in both:\n %s\n"

307

repo.ui.debug(" unmatched files new in both:\n %s\n"

310

% "\n ".join(bothnew))

308

% "\n ".join(bothnew))

311

bothdiverge, _copy, _fullcopy = {}, {}, {}

309

bothdiverge, _copy, _fullcopy = {}, {}, {}

312

for f in bothnew:

310

for f in bothnew:

313

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

311

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

314

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

312

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

315

for of, fl in bothdiverge.items():

313

for of, fl in bothdiverge.items():

316

if len(fl) == 2 and fl[0] == fl[1]:

314

if len(fl) == 2 and fl[0] == fl[1]:

317

copy[fl[0]] = of # not actually divergent, just matching renames

315

copy[fl[0]] = of # not actually divergent, just matching renames

318

316

319

if fullcopy and repo.ui.debugflag:

317

if fullcopy and repo.ui.debugflag:

320

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

318

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

321

"% = renamed and deleted):\n")

319

"% = renamed and deleted):\n")

322

for f in sorted(fullcopy):

320

for f in sorted(fullcopy):

323

note = ""

321

note = ""

324

if f in copy:

322

if f in copy:

325

note += "*"

323

note += "*"

326

if f in diverge2:

324

if f in diverge2:

327

note += "!"

325

note += "!"

328

if f in renamedelete2:

326

if f in renamedelete2:

329

note += "%"

327

note += "%"

330

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

328

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

331

note))

329

note))

332

del diverge2

330

del diverge2

333

331

334

if not fullcopy:

332

if not fullcopy:

335

return copy, movewithdir, diverge, renamedelete

333

return copy, movewithdir, diverge, renamedelete

336

334

337

repo.ui.debug(" checking for directory renames\n")

335

repo.ui.debug(" checking for directory renames\n")

338

336

339

# generate a directory move map

337

# generate a directory move map

340

d1, d2 = c1.dirs(), c2.dirs()

338

d1, d2 = c1.dirs(), c2.dirs()

341

d1.addpath('/')

339

d1.addpath('/')

342

d2.addpath('/')

340

d2.addpath('/')

343

invalid = set()

341

invalid = set()

344

dirmove = {}

342

dirmove = {}

345

343

346

# examine each file copy for a potential directory move, which is

344

# examine each file copy for a potential directory move, which is

347

# when all the files in a directory are moved to a new directory

345

# when all the files in a directory are moved to a new directory

348

for dst, src in fullcopy.iteritems():

346

for dst, src in fullcopy.iteritems():

349

dsrc, ddst = _dirname(src), _dirname(dst)

347

dsrc, ddst = _dirname(src), _dirname(dst)

350

if dsrc in invalid:

348

if dsrc in invalid:

351

# already seen to be uninteresting

349

# already seen to be uninteresting

352

continue

350

continue

353

elif dsrc in d1 and ddst in d1:

351

elif dsrc in d1 and ddst in d1:

354

# directory wasn't entirely moved locally

352

# directory wasn't entirely moved locally

355

invalid.add(dsrc)

353

invalid.add(dsrc)

356

elif dsrc in d2 and ddst in d2:

354

elif dsrc in d2 and ddst in d2:

357

# directory wasn't entirely moved remotely

355

# directory wasn't entirely moved remotely

358

invalid.add(dsrc)

356

invalid.add(dsrc)

359

elif dsrc in dirmove and dirmove[dsrc] != ddst:

357

elif dsrc in dirmove and dirmove[dsrc] != ddst:

360

# files from the same directory moved to two different places

358

# files from the same directory moved to two different places

361

invalid.add(dsrc)

359

invalid.add(dsrc)

362

else:

360

else:

363

# looks good so far

361

# looks good so far

364

dirmove[dsrc + "/"] = ddst + "/"

362

dirmove[dsrc + "/"] = ddst + "/"

365

363

366

for i in invalid:

364

for i in invalid:

367

if i in dirmove:

365

if i in dirmove:

368

del dirmove[i]

366

del dirmove[i]

369

del d1, d2, invalid

367

del d1, d2, invalid

370

368

371

if not dirmove:

369

if not dirmove:

372

return copy, movewithdir, diverge, renamedelete

370

return copy, movewithdir, diverge, renamedelete

373

371

374

for d in dirmove:

372

for d in dirmove:

375

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

373

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

376

(d, dirmove[d]))

374

(d, dirmove[d]))

377

375

378

# check unaccounted nonoverlapping files against directory moves

376

# check unaccounted nonoverlapping files against directory moves

379

for f in u1 + u2:

377

for f in u1 + u2:

380

if f not in fullcopy:

378

if f not in fullcopy:

381

for d in dirmove:

379

for d in dirmove:

382

if f.startswith(d):

380

if f.startswith(d):

383

# new file added in a directory that was moved, move it

381

# new file added in a directory that was moved, move it

384

df = dirmove[d] + f[len(d):]

382

df = dirmove[d] + f[len(d):]

385

if df not in copy:

383

if df not in copy:

386

movewithdir[f] = df

384

movewithdir[f] = df

387

repo.ui.debug((" pending file src: '%s' -> "

385

repo.ui.debug((" pending file src: '%s' -> "

388

"dst: '%s'\n") % (f, df))

386

"dst: '%s'\n") % (f, df))

389

break

387

break

390

388

391

return copy, movewithdir, diverge, renamedelete

389

return copy, movewithdir, diverge, renamedelete

392

390

393

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

391

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

394

"""

392

"""

395

check possible copies of f from m1 to m2

393

check possible copies of f from m1 to m2

396

394

397

ctx = function accepting (filename, node) that returns a filectx.

395

ctx = function accepting (filename, node) that returns a filectx.

398

f = the filename to check

396

f = the filename to check

399

m1 = the source manifest

397

m1 = the source manifest

400

m2 = the destination manifest

398

m2 = the destination manifest

401

ca = the changectx of the common ancestor

399

ca = the changectx of the common ancestor

402

limit = the rev number to not search beyond

400

limit = the rev number to not search beyond

403

diverge = record all diverges in this dict

401

diverge = record all diverges in this dict

404

copy = record all non-divergent copies in this dict

402

copy = record all non-divergent copies in this dict

405

fullcopy = record all copies in this dict

403

fullcopy = record all copies in this dict

406

"""

404

"""

407

405

408

ma = ca.manifest()

406

ma = ca.manifest()

409

407

410

def _related(f1, f2, limit):

408

def _related(f1, f2, limit):

411

# Walk back to common ancestor to see if the two files originate

409

# Walk back to common ancestor to see if the two files originate

412

# from the same file. Since workingfilectx's rev() is None it messes

410

# from the same file. Since workingfilectx's rev() is None it messes

413

# up the integer comparison logic, hence the pre-step check for

411

# up the integer comparison logic, hence the pre-step check for

414

# None (f1 and f2 can only be workingfilectx's initially).

412

# None (f1 and f2 can only be workingfilectx's initially).

415

413

416

if f1 == f2:

414

if f1 == f2:

417

return f1 # a match

415

return f1 # a match

418

416

419

g1, g2 = f1.ancestors(), f2.ancestors()

417

g1, g2 = f1.ancestors(), f2.ancestors()

420

try:

418

try:

421

f1r, f2r = f1.rev(), f2.rev()

419

f1r, f2r = f1.rev(), f2.rev()

422

420

423

if f1r is None:

421

if f1r is None:

424

f1 = g1.next()

422

f1 = g1.next()

425

if f2r is None:

423

if f2r is None:

426

f2 = g2.next()

424

f2 = g2.next()

427

425

428

while True:

426

while True:

429

f1r, f2r = f1.rev(), f2.rev()

427

f1r, f2r = f1.rev(), f2.rev()

430

if f1r > f2r:

428

if f1r > f2r:

431

f1 = g1.next()

429

f1 = g1.next()

432

elif f2r > f1r:

430

elif f2r > f1r:

433

f2 = g2.next()

431

f2 = g2.next()

434

elif f1 == f2:

432

elif f1 == f2:

435

return f1 # a match

433

return f1 # a match

436

elif f1r == f2r or f1r < limit or f2r < limit:

434

elif f1r == f2r or f1r < limit or f2r < limit:

437

return False # copy no longer relevant

435

return False # copy no longer relevant

438

except StopIteration:

436

except StopIteration:

439

return False

437

return False

440

438

441

of = None

439

of = None

442

seen = set([f])

440

seen = set([f])

443

for oc in ctx(f, m1[f]).ancestors():

441

for oc in ctx(f, m1[f]).ancestors():

444

ocr = oc.rev()

442

ocr = oc.rev()

445

of = oc.path()

443

of = oc.path()

446

if of in seen:

444

if of in seen:

447

# check limit late - grab last rename before

445

# check limit late - grab last rename before

448

if ocr < limit:

446

if ocr < limit:

449

break

447

break

450

continue

448

continue

451

seen.add(of)

449

seen.add(of)

452

450

453

fullcopy[f] = of # remember for dir rename detection

451

fullcopy[f] = of # remember for dir rename detection

454

if of not in m2:

452

if of not in m2:

455

continue # no match, keep looking

453

continue # no match, keep looking

456

if m2[of] == ma.get(of):

454

if m2[of] == ma.get(of):

457

break # no merge needed, quit early

455

break # no merge needed, quit early

458

c2 = ctx(of, m2[of])

456

c2 = ctx(of, m2[of])

459

cr = _related(oc, c2, ca.rev())

457

cr = _related(oc, c2, ca.rev())

460

if cr and (of == f or of == c2.path()): # non-divergent

458

if cr and (of == f or of == c2.path()): # non-divergent

461

copy[f] = of

459

copy[f] = of

462

of = None

460

of = None

463

break

461

break

464

462

465

if of in ma:

463

if of in ma:

466

diverge.setdefault(of, []).append(f)

464

diverge.setdefault(of, []).append(f)

467

465

468

def duplicatecopies(repo, rev, fromrev, skiprev=None):

466

def duplicatecopies(repo, rev, fromrev, skiprev=None):

469

'''reproduce copies from fromrev to rev in the dirstate

467

'''reproduce copies from fromrev to rev in the dirstate

470

468

471

If skiprev is specified, it's a revision that should be used to

469

If skiprev is specified, it's a revision that should be used to

472

filter copy records. Any copies that occur between fromrev and

470

filter copy records. Any copies that occur between fromrev and

473

skiprev will not be duplicated, even if they appear in the set of

471

skiprev will not be duplicated, even if they appear in the set of

474

copies between fromrev and rev.

472

copies between fromrev and rev.

475

'''

473

'''

476

exclude = {}

474

exclude = {}

477

if skiprev is not None:

475

if skiprev is not None:

478

exclude = pathcopies(repo[fromrev], repo[skiprev])

476

exclude = pathcopies(repo[fromrev], repo[skiprev])

479

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

477

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

480

# copies.pathcopies returns backward renames, so dst might not

478

# copies.pathcopies returns backward renames, so dst might not

481

# actually be in the dirstate

479

# actually be in the dirstate

482

if dst in exclude:

480

if dst in exclude:

483

continue

481

continue

484

if repo.dirstate[dst] in "nma":

482

if repo.dirstate[dst] in "nma":

485

repo.dirstate.copy(src, dst)

483

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
-            def _nonoverlap(d1, d2, d3):
-                "Return list of elements in d1 not in d2 or d3"
-                return sorted([d for d in d1 if d not in d3 and d not in d2])
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _computeforwardmissing(a, b):
                 """Computes which files are in b but not a.
                 This is its own function so extensions can easily wrap this call to see what
                 files _forwardcopies is about to process.
                 """
                 return b.manifest().filesnotin(a.manifest())
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 missing = _computeforwardmissing(a, b)
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
             def _computenonoverlap(repo, m1, m2, ma):
                 """Computes the files exclusive to m1 and m2.
                 This is its own function so extensions can easily wrap this call to see what
                 files mergecopies is about to process.
                 """
-                u1 = _nonoverlap(m1, m2, ma)
+                addedinm1 = m1.filesnotin(ma)
-                u2 = _nonoverlap(m2, m1, ma)
+                addedinm2 = m2.filesnotin(ma)
+                u1 = sorted(addedinm1 - addedinm2)
+                u2 = sorted(addedinm2 - addedinm1)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 return u1, u2
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def makectx(f, n):
                     if len(n) != 20: # in a working context?
                         if c1.rev() is None:
                             return c1.filectx(f)
                         return c2.filectx(f)
                     return repo.filectx(f, fileid=n)
                 ctx = util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 u1, u2 = _computenonoverlap(repo, m1, m2, ma)
                 for f in u1:
                     checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
                 for f in u2:
                     checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
                 bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge, _copy, _fullcopy = {}, {}, {}
                 for f in bothnew:
                     checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
                     checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
                 """
                 check possible copies of f from m1 to m2
                 ctx = function accepting (filename, node) that returns a filectx.
                 f = the filename to check
                 m1 = the source manifest
                 m2 = the destination manifest
                 ca = the changectx of the common ancestor
                 limit = the rev number to not search beyond
                 diverge = record all diverges in this dict
                 copy = record all non-divergent copies in this dict
                 fullcopy = record all copies in this dict
                 """
                 ma = ca.manifest()
                 def _related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.rev(), f2.rev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.rev(), f2.rev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 of = None
                 seen = set([f])
                 for oc in ctx(f, m1[f]).ancestors():
                     ocr = oc.rev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     fullcopy[f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == ma.get(of):
                         break # no merge needed, quit early
                     c2 = ctx(of, m2[of])
                     cr = _related(oc, c2, ca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         copy[f] = of
                         of = None
                         break
                 if of in ma:
                     diverge.setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if skiprev is not None:
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)