upstream/mercurial-mirror Commit - r24010:a63c2b15

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _nonoverlap(d1, d2, d3):

11

def _nonoverlap(d1, d2, d3):

12

"Return list of elements in d1 not in d2 or d3"

12

"Return list of elements in d1 not in d2 or d3"

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

14

15

def _dirname(f):

15

def _dirname(f):

16

s = f.rfind("/")

16

s = f.rfind("/")

17

if s == -1:

17

if s == -1:

18

return ""

18

return ""

19

return f[:s]

19

return f[:s]

20

21

def _findlimit(repo, a, b):

21

def _findlimit(repo, a, b):

22

"""

22

"""

23

Find the last revision that needs to be checked to ensure that a full

23

Find the last revision that needs to be checked to ensure that a full

24

transitive closure for file copies can be properly calculated.

24

transitive closure for file copies can be properly calculated.

25

Generally, this means finding the earliest revision number that's an

25

Generally, this means finding the earliest revision number that's an

26

ancestor of a or b but not both, except when a or b is a direct descendent

26

ancestor of a or b but not both, except when a or b is a direct descendent

27

of the other, in which case we can return the minimum revnum of a and b.

27

of the other, in which case we can return the minimum revnum of a and b.

28

None if no such revision exists.

28

None if no such revision exists.

29

"""

29

"""

30

31

# basic idea:

31

# basic idea:

32

# - mark a and b with different sides

32

# - mark a and b with different sides

33

# - if a parent's children are all on the same side, the parent is

33

# - if a parent's children are all on the same side, the parent is

34

# on that side, otherwise it is on no side

34

# on that side, otherwise it is on no side

35

# - walk the graph in topological order with the help of a heap;

35

# - walk the graph in topological order with the help of a heap;

36

# - add unseen parents to side map

36

# - add unseen parents to side map

37

# - clear side of any parent that has children on different sides

37

# - clear side of any parent that has children on different sides

38

# - track number of interesting revs that might still be on a side

38

# - track number of interesting revs that might still be on a side

39

# - track the lowest interesting rev seen

39

# - track the lowest interesting rev seen

40

# - quit when interesting revs is zero

40

# - quit when interesting revs is zero

41

42

cl = repo.changelog

42

cl = repo.changelog

43

working = len(cl) # pseudo rev for the working directory

43

working = len(cl) # pseudo rev for the working directory

44

if a is None:

44

if a is None:

45

a = working

45

a = working

46

if b is None:

46

if b is None:

47

b = working

47

b = working

48

49

side = {a: -1, b: 1}

49

side = {a: -1, b: 1}

50

visit = [-a, -b]

50

visit = [-a, -b]

51

heapq.heapify(visit)

51

heapq.heapify(visit)

52

interesting = len(visit)

52

interesting = len(visit)

53

hascommonancestor = False

53

hascommonancestor = False

54

limit = working

54

limit = working

55

56

while interesting:

56

while interesting:

57

r = -heapq.heappop(visit)

57

r = -heapq.heappop(visit)

58

if r == working:

58

if r == working:

59

parents = [cl.rev(p) for p in repo.dirstate.parents()]

59

parents = [cl.rev(p) for p in repo.dirstate.parents()]

60

else:

60

else:

61

parents = cl.parentrevs(r)

61

parents = cl.parentrevs(r)

62

for p in parents:

62

for p in parents:

63

if p < 0:

63

if p < 0:

64

continue

64

continue

65

if p not in side:

65

if p not in side:

66

# first time we see p; add it to visit

66

# first time we see p; add it to visit

67

side[p] = side[r]

67

side[p] = side[r]

68

if side[p]:

68

if side[p]:

69

interesting += 1

69

interesting += 1

70

heapq.heappush(visit, -p)

70

heapq.heappush(visit, -p)

71

elif side[p] and side[p] != side[r]:

71

elif side[p] and side[p] != side[r]:

72

# p was interesting but now we know better

72

# p was interesting but now we know better

73

side[p] = 0

73

side[p] = 0

74

interesting -= 1

74

interesting -= 1

75

hascommonancestor = True

75

hascommonancestor = True

76

if side[r]:

76

if side[r]:

77

limit = r # lowest rev visited

77

limit = r # lowest rev visited

78

interesting -= 1

78

interesting -= 1

79

80

if not hascommonancestor:

80

if not hascommonancestor:

81

return None

81

return None

82

83

# Consider the following flow (see test-commit-amend.t under issue4405):

83

# Consider the following flow (see test-commit-amend.t under issue4405):

84

# 1/ File 'a0' committed

84

# 1/ File 'a0' committed

85

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

85

# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')

86

# 3/ Move back to first commit

86

# 3/ Move back to first commit

87

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

87

# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')

88

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

88

# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'

89

#

89

#

90

# During the amend in step five, we will be in this state:

90

# During the amend in step five, we will be in this state:

91

#

91

#

92

# @ 3 temporary amend commit for a1-amend

92

# @ 3 temporary amend commit for a1-amend

93

# |

93

# |

94

# o 2 a1-amend

94

# o 2 a1-amend

95

# |

95

# |

96

# | o 1 a1

96

# | o 1 a1

97

# |/

97

# |/

98

# o 0 a0

98

# o 0 a0

99

#

99

#

100

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

100

# When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,

101

# yet the filelog has the copy information in rev 1 and we will not look

101

# yet the filelog has the copy information in rev 1 and we will not look

102

# back far enough unless we also look at the a and b as candidates.

102

# back far enough unless we also look at the a and b as candidates.

103

# This only occurs when a is a descendent of b or visa-versa.

103

# This only occurs when a is a descendent of b or visa-versa.

104

return min(limit, a, b)

104

return min(limit, a, b)

105

106

def _chain(src, dst, a, b):

106

def _chain(src, dst, a, b):

107

'''chain two sets of copies a->b'''

107

'''chain two sets of copies a->b'''

108

t = a.copy()

108

t = a.copy()

109

for k, v in b.iteritems():

109

for k, v in b.iteritems():

110

if v in t:

110

if v in t:

111

# found a chain

111

# found a chain

112

if t[v] != k:

112

if t[v] != k:

113

# file wasn't renamed back to itself

113

# file wasn't renamed back to itself

114

t[k] = t[v]

114

t[k] = t[v]

115

if v not in dst:

115

if v not in dst:

116

# chain was a rename, not a copy

116

# chain was a rename, not a copy

117

del t[v]

117

del t[v]

118

if v in src:

118

if v in src:

119

# file is a copy of an existing file

119

# file is a copy of an existing file

120

t[k] = v

120

t[k] = v

121

122

# remove criss-crossed copies

122

# remove criss-crossed copies

123

for k, v in t.items():

123

for k, v in t.items():

124

if k in src and v in dst:

124

if k in src and v in dst:

125

del t[k]

125

del t[k]

126

127

return t

127

return t

128

129

def _tracefile(fctx, am, limit=-1):

129

def _tracefile(fctx, am, limit=-1):

130

'''return file context that is the ancestor of fctx present in ancestor

130

'''return file context that is the ancestor of fctx present in ancestor

131

manifest am, stopping after the first ancestor lower than limit'''

131

manifest am, stopping after the first ancestor lower than limit'''

132

133

for f in fctx.ancestors():

133

for f in fctx.ancestors():

134

if am.get(f.path(), None) == f.filenode():

134

if am.get(f.path(), None) == f.filenode():

135

return f

135

return f

136

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

136

if limit >= 0 and f.linkrev() < limit and f.rev() < limit:

137

return None

137

return None

138

139

def _dirstatecopies(d):

139

def _dirstatecopies(d):

140

ds = d._repo.dirstate

140

ds = d._repo.dirstate

141

c = ds.copies().copy()

141

c = ds.copies().copy()

142

for k in c.keys():

142

for k in c.keys():

143

if ds[k] not in 'anm':

143

if ds[k] not in 'anm':

144

del c[k]

144

del c[k]

145

return c

145

return c

146

147

def _forwardcopies(a, b):

147

def _forwardcopies(a, b):

148

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

148

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

149

150

# check for working copy

150

# check for working copy

151

w = None

151

w = None

152

if b.rev() is None:

152

if b.rev() is None:

153

w = b

153

w = b

154

b = w.p1()

154

b = w.p1()

155

if a == b:

155

if a == b:

156

# short-circuit to avoid issues with merge states

156

# short-circuit to avoid issues with merge states

157

return _dirstatecopies(w)

157

return _dirstatecopies(w)

158

159

# files might have to be traced back to the fctx parent of the last

159

# files might have to be traced back to the fctx parent of the last

160

# one-side-only changeset, but not further back than that

160

# one-side-only changeset, but not further back than that

161

limit = _findlimit(a._repo, a.rev(), b.rev())

161

limit = _findlimit(a._repo, a.rev(), b.rev())

162

if limit is None:

162

if limit is None:

163

limit = -1

163

limit = -1

164

am = a.manifest()

164

am = a.manifest()

165

166

# find where new files came from

166

# find where new files came from

167

# we currently don't try to find where old files went, too expensive

167

# we currently don't try to find where old files went, too expensive

168

# this means we can miss a case like 'hg rm b; hg cp a b'

168

# this means we can miss a case like 'hg rm b; hg cp a b'

169

cm = {}

169

cm = {}

170

missing = set(b.manifest().iterkeys())

170

missing = set(b.manifest().iterkeys())

171

missing.difference_update(a.manifest().iterkeys())

171

missing.difference_update(a.manifest().iterkeys())

172

173

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

173

ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)

174

for f in missing:

174

for f in missing:

175

fctx = b[f]

175

fctx = b[f]

176

fctx._ancestrycontext = ancestrycontext

176

fctx._ancestrycontext = ancestrycontext

177

ofctx = _tracefile(fctx, am, limit)

177

ofctx = _tracefile(fctx, am, limit)

178

if ofctx:

178

if ofctx:

179

cm[f] = ofctx.path()

179

cm[f] = ofctx.path()

180

181

# combine copies from dirstate if necessary

181

# combine copies from dirstate if necessary

182

if w is not None:

182

if w is not None:

183

cm = _chain(a, w, cm, _dirstatecopies(w))

183

cm = _chain(a, w, cm, _dirstatecopies(w))

184

185

return cm

185

return cm

186

187

def _backwardrenames(a, b):

187

def _backwardrenames(a, b):

188

# Even though we're not taking copies into account, 1:n rename situations

188

# Even though we're not taking copies into account, 1:n rename situations

189

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

189

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

190

# arbitrarily pick one of the renames.

190

# arbitrarily pick one of the renames.

191

f = _forwardcopies(b, a)

191

f = _forwardcopies(b, a)

192

r = {}

192

r = {}

193

for k, v in sorted(f.iteritems()):

193

for k, v in sorted(f.iteritems()):

194

# remove copies

194

# remove copies

195

if v in a:

195

if v in a:

196

continue

196

continue

197

r[v] = k

197

r[v] = k

198

return r

198

return r

199

200

def pathcopies(x, y):

200

def pathcopies(x, y):

201

'''find {dst@y: src@x} copy mapping for directed compare'''

201

'''find {dst@y: src@x} copy mapping for directed compare'''

202

if x == y or not x or not y:

202

if x == y or not x or not y:

203

return {}

203

return {}

204

a = y.ancestor(x)

204

a = y.ancestor(x)

205

if a == x:

205

if a == x:

206

return _forwardcopies(x, y)

206

return _forwardcopies(x, y)

207

if a == y:

207

if a == y:

208

return _backwardrenames(x, y)

208

return _backwardrenames(x, y)

209

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

209

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

210

211

def _computenonoverlap(repo, m1, m2, ma):

212

"""Computes the files exclusive to m1 and m2.

213

This is its own function so extensions can easily wrap this call to see what

214

files mergecopies is about to process.

215

"""

216

u1 = _nonoverlap(m1, m2, ma)

217

u2 = _nonoverlap(m2, m1, ma)

218

219

if u1:

220

repo.ui.debug(" unmatched files in local:\n %s\n"

221

% "\n ".join(u1))

222

if u2:

223

repo.ui.debug(" unmatched files in other:\n %s\n"

224

% "\n ".join(u2))

225

return u1, u2

226

211

def mergecopies(repo, c1, c2, ca):

227

def mergecopies(repo, c1, c2, ca):

212

"""

228

"""

213

Find moves and copies between context c1 and c2 that are relevant

229

Find moves and copies between context c1 and c2 that are relevant

214

for merging.

230

for merging.

215

231

216

Returns four dicts: "copy", "movewithdir", "diverge", and

232

Returns four dicts: "copy", "movewithdir", "diverge", and

217

"renamedelete".

233

"renamedelete".

218

234

219

"copy" is a mapping from destination name -> source name,

235

"copy" is a mapping from destination name -> source name,

220

where source is in c1 and destination is in c2 or vice-versa.

236

where source is in c1 and destination is in c2 or vice-versa.

221

237

222

"movewithdir" is a mapping from source name -> destination name,

238

"movewithdir" is a mapping from source name -> destination name,

223

where the file at source present in one context but not the other

239

where the file at source present in one context but not the other

224

needs to be moved to destination by the merge process, because the

240

needs to be moved to destination by the merge process, because the

225

other context moved the directory it is in.

241

other context moved the directory it is in.

226

242

227

"diverge" is a mapping of source name -> list of destination names

243

"diverge" is a mapping of source name -> list of destination names

228

for divergent renames.

244

for divergent renames.

229

245

230

"renamedelete" is a mapping of source name -> list of destination

246

"renamedelete" is a mapping of source name -> list of destination

231

names for files deleted in c1 that were renamed in c2 or vice-versa.

247

names for files deleted in c1 that were renamed in c2 or vice-versa.

232

"""

248

"""

233

# avoid silly behavior for update from empty dir

249

# avoid silly behavior for update from empty dir

234

if not c1 or not c2 or c1 == c2:

250

if not c1 or not c2 or c1 == c2:

235

return {}, {}, {}, {}

251

return {}, {}, {}, {}

236

252

237

# avoid silly behavior for parent -> working dir

253

# avoid silly behavior for parent -> working dir

238

if c2.node() is None and c1.node() == repo.dirstate.p1():

254

if c2.node() is None and c1.node() == repo.dirstate.p1():

239

return repo.dirstate.copies(), {}, {}, {}

255

return repo.dirstate.copies(), {}, {}, {}

240

256

241

limit = _findlimit(repo, c1.rev(), c2.rev())

257

limit = _findlimit(repo, c1.rev(), c2.rev())

242

if limit is None:

258

if limit is None:

243

# no common ancestor, no copies

259

# no common ancestor, no copies

244

return {}, {}, {}, {}

260

return {}, {}, {}, {}

245

m1 = c1.manifest()

261

m1 = c1.manifest()

246

m2 = c2.manifest()

262

m2 = c2.manifest()

247

ma = ca.manifest()

263

ma = ca.manifest()

248

264

249

def makectx(f, n):

265

def makectx(f, n):

250

if len(n) != 20: # in a working context?

266

if len(n) != 20: # in a working context?

251

if c1.rev() is None:

267

if c1.rev() is None:

252

return c1.filectx(f)

268

return c1.filectx(f)

253

return c2.filectx(f)

269

return c2.filectx(f)

254

return repo.filectx(f, fileid=n)

270

return repo.filectx(f, fileid=n)

255

271

256

ctx = util.lrucachefunc(makectx)

272

ctx = util.lrucachefunc(makectx)

257

copy = {}

273

copy = {}

258

movewithdir = {}

274

movewithdir = {}

259

fullcopy = {}

275

fullcopy = {}

260

diverge = {}

276

diverge = {}

261

277

262

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

278

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

263

279

264

u1 = _nonoverlap(m1, m2, ma)

280

u1, u2 = _computenonoverlap(repo, m1, m2, ma)

265

u2 = _nonoverlap(m2, m1, ma)

266

267

if u1:

268

repo.ui.debug(" unmatched files in local:\n %s\n"

269

% "\n ".join(u1))

270

if u2:

271

repo.ui.debug(" unmatched files in other:\n %s\n"

272

% "\n ".join(u2))

273

281

274

for f in u1:

282

for f in u1:

275

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

283

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

276

284

277

for f in u2:

285

for f in u2:

278

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

286

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

279

287

280

renamedelete = {}

288

renamedelete = {}

281

renamedelete2 = set()

289

renamedelete2 = set()

282

diverge2 = set()

290

diverge2 = set()

283

for of, fl in diverge.items():

291

for of, fl in diverge.items():

284

if len(fl) == 1 or of in c1 or of in c2:

292

if len(fl) == 1 or of in c1 or of in c2:

285

del diverge[of] # not actually divergent, or not a rename

293

del diverge[of] # not actually divergent, or not a rename

286

if of not in c1 and of not in c2:

294

if of not in c1 and of not in c2:

287

# renamed on one side, deleted on the other side, but filter

295

# renamed on one side, deleted on the other side, but filter

288

# out files that have been renamed and then deleted

296

# out files that have been renamed and then deleted

289

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

297

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

290

renamedelete2.update(fl) # reverse map for below

298

renamedelete2.update(fl) # reverse map for below

291

else:

299

else:

292

diverge2.update(fl) # reverse map for below

300

diverge2.update(fl) # reverse map for below

293

301

294

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

302

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

295

if bothnew:

303

if bothnew:

296

repo.ui.debug(" unmatched files new in both:\n %s\n"

304

repo.ui.debug(" unmatched files new in both:\n %s\n"

297

% "\n ".join(bothnew))

305

% "\n ".join(bothnew))

298

bothdiverge, _copy, _fullcopy = {}, {}, {}

306

bothdiverge, _copy, _fullcopy = {}, {}, {}

299

for f in bothnew:

307

for f in bothnew:

300

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

308

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

301

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

309

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

302

for of, fl in bothdiverge.items():

310

for of, fl in bothdiverge.items():

303

if len(fl) == 2 and fl[0] == fl[1]:

311

if len(fl) == 2 and fl[0] == fl[1]:

304

copy[fl[0]] = of # not actually divergent, just matching renames

312

copy[fl[0]] = of # not actually divergent, just matching renames

305

313

306

if fullcopy and repo.ui.debugflag:

314

if fullcopy and repo.ui.debugflag:

307

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

315

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

308

"% = renamed and deleted):\n")

316

"% = renamed and deleted):\n")

309

for f in sorted(fullcopy):

317

for f in sorted(fullcopy):

310

note = ""

318

note = ""

311

if f in copy:

319

if f in copy:

312

note += "*"

320

note += "*"

313

if f in diverge2:

321

if f in diverge2:

314

note += "!"

322

note += "!"

315

if f in renamedelete2:

323

if f in renamedelete2:

316

note += "%"

324

note += "%"

317

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

325

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

318

note))

326

note))

319

del diverge2

327

del diverge2

320

328

321

if not fullcopy:

329

if not fullcopy:

322

return copy, movewithdir, diverge, renamedelete

330

return copy, movewithdir, diverge, renamedelete

323

331

324

repo.ui.debug(" checking for directory renames\n")

332

repo.ui.debug(" checking for directory renames\n")

325

333

326

# generate a directory move map

334

# generate a directory move map

327

d1, d2 = c1.dirs(), c2.dirs()

335

d1, d2 = c1.dirs(), c2.dirs()

328

d1.addpath('/')

336

d1.addpath('/')

329

d2.addpath('/')

337

d2.addpath('/')

330

invalid = set()

338

invalid = set()

331

dirmove = {}

339

dirmove = {}

332

340

333

# examine each file copy for a potential directory move, which is

341

# examine each file copy for a potential directory move, which is

334

# when all the files in a directory are moved to a new directory

342

# when all the files in a directory are moved to a new directory

335

for dst, src in fullcopy.iteritems():

343

for dst, src in fullcopy.iteritems():

336

dsrc, ddst = _dirname(src), _dirname(dst)

344

dsrc, ddst = _dirname(src), _dirname(dst)

337

if dsrc in invalid:

345

if dsrc in invalid:

338

# already seen to be uninteresting

346

# already seen to be uninteresting

339

continue

347

continue

340

elif dsrc in d1 and ddst in d1:

348

elif dsrc in d1 and ddst in d1:

341

# directory wasn't entirely moved locally

349

# directory wasn't entirely moved locally

342

invalid.add(dsrc)

350

invalid.add(dsrc)

343

elif dsrc in d2 and ddst in d2:

351

elif dsrc in d2 and ddst in d2:

344

# directory wasn't entirely moved remotely

352

# directory wasn't entirely moved remotely

345

invalid.add(dsrc)

353

invalid.add(dsrc)

346

elif dsrc in dirmove and dirmove[dsrc] != ddst:

354

elif dsrc in dirmove and dirmove[dsrc] != ddst:

347

# files from the same directory moved to two different places

355

# files from the same directory moved to two different places

348

invalid.add(dsrc)

356

invalid.add(dsrc)

349

else:

357

else:

350

# looks good so far

358

# looks good so far

351

dirmove[dsrc + "/"] = ddst + "/"

359

dirmove[dsrc + "/"] = ddst + "/"

352

360

353

for i in invalid:

361

for i in invalid:

354

if i in dirmove:

362

if i in dirmove:

355

del dirmove[i]

363

del dirmove[i]

356

del d1, d2, invalid

364

del d1, d2, invalid

357

365

358

if not dirmove:

366

if not dirmove:

359

return copy, movewithdir, diverge, renamedelete

367

return copy, movewithdir, diverge, renamedelete

360

368

361

for d in dirmove:

369

for d in dirmove:

362

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

370

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

363

(d, dirmove[d]))

371

(d, dirmove[d]))

364

372

365

# check unaccounted nonoverlapping files against directory moves

373

# check unaccounted nonoverlapping files against directory moves

366

for f in u1 + u2:

374

for f in u1 + u2:

367

if f not in fullcopy:

375

if f not in fullcopy:

368

for d in dirmove:

376

for d in dirmove:

369

if f.startswith(d):

377

if f.startswith(d):

370

# new file added in a directory that was moved, move it

378

# new file added in a directory that was moved, move it

371

df = dirmove[d] + f[len(d):]

379

df = dirmove[d] + f[len(d):]

372

if df not in copy:

380

if df not in copy:

373

movewithdir[f] = df

381

movewithdir[f] = df

374

repo.ui.debug((" pending file src: '%s' -> "

382

repo.ui.debug((" pending file src: '%s' -> "

375

"dst: '%s'\n") % (f, df))

383

"dst: '%s'\n") % (f, df))

376

break

384

break

377

385

378

return copy, movewithdir, diverge, renamedelete

386

return copy, movewithdir, diverge, renamedelete

379

387

380

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

388

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

381

"""

389

"""

382

check possible copies of f from m1 to m2

390

check possible copies of f from m1 to m2

383

391

384

ctx = function accepting (filename, node) that returns a filectx.

392

ctx = function accepting (filename, node) that returns a filectx.

385

f = the filename to check

393

f = the filename to check

386

m1 = the source manifest

394

m1 = the source manifest

387

m2 = the destination manifest

395

m2 = the destination manifest

388

ca = the changectx of the common ancestor

396

ca = the changectx of the common ancestor

389

limit = the rev number to not search beyond

397

limit = the rev number to not search beyond

390

diverge = record all diverges in this dict

398

diverge = record all diverges in this dict

391

copy = record all non-divergent copies in this dict

399

copy = record all non-divergent copies in this dict

392

fullcopy = record all copies in this dict

400

fullcopy = record all copies in this dict

393

"""

401

"""

394

402

395

ma = ca.manifest()

403

ma = ca.manifest()

396

404

397

def _related(f1, f2, limit):

405

def _related(f1, f2, limit):

398

# Walk back to common ancestor to see if the two files originate

406

# Walk back to common ancestor to see if the two files originate

399

# from the same file. Since workingfilectx's rev() is None it messes

407

# from the same file. Since workingfilectx's rev() is None it messes

400

# up the integer comparison logic, hence the pre-step check for

408

# up the integer comparison logic, hence the pre-step check for

401

# None (f1 and f2 can only be workingfilectx's initially).

409

# None (f1 and f2 can only be workingfilectx's initially).

402

410

403

if f1 == f2:

411

if f1 == f2:

404

return f1 # a match

412

return f1 # a match

405

413

406

g1, g2 = f1.ancestors(), f2.ancestors()

414

g1, g2 = f1.ancestors(), f2.ancestors()

407

try:

415

try:

408

f1r, f2r = f1.rev(), f2.rev()

416

f1r, f2r = f1.rev(), f2.rev()

409

417

410

if f1r is None:

418

if f1r is None:

411

f1 = g1.next()

419

f1 = g1.next()

412

if f2r is None:

420

if f2r is None:

413

f2 = g2.next()

421

f2 = g2.next()

414

422

415

while True:

423

while True:

416

f1r, f2r = f1.rev(), f2.rev()

424

f1r, f2r = f1.rev(), f2.rev()

417

if f1r > f2r:

425

if f1r > f2r:

418

f1 = g1.next()

426

f1 = g1.next()

419

elif f2r > f1r:

427

elif f2r > f1r:

420

f2 = g2.next()

428

f2 = g2.next()

421

elif f1 == f2:

429

elif f1 == f2:

422

return f1 # a match

430

return f1 # a match

423

elif f1r == f2r or f1r < limit or f2r < limit:

431

elif f1r == f2r or f1r < limit or f2r < limit:

424

return False # copy no longer relevant

432

return False # copy no longer relevant

425

except StopIteration:

433

except StopIteration:

426

return False

434

return False

427

435

428

of = None

436

of = None

429

seen = set([f])

437

seen = set([f])

430

for oc in ctx(f, m1[f]).ancestors():

438

for oc in ctx(f, m1[f]).ancestors():

431

ocr = oc.rev()

439

ocr = oc.rev()

432

of = oc.path()

440

of = oc.path()

433

if of in seen:

441

if of in seen:

434

# check limit late - grab last rename before

442

# check limit late - grab last rename before

435

if ocr < limit:

443

if ocr < limit:

436

break

444

break

437

continue

445

continue

438

seen.add(of)

446

seen.add(of)

439

447

440

fullcopy[f] = of # remember for dir rename detection

448

fullcopy[f] = of # remember for dir rename detection

441

if of not in m2:

449

if of not in m2:

442

continue # no match, keep looking

450

continue # no match, keep looking

443

if m2[of] == ma.get(of):

451

if m2[of] == ma.get(of):

444

break # no merge needed, quit early

452

break # no merge needed, quit early

445

c2 = ctx(of, m2[of])

453

c2 = ctx(of, m2[of])

446

cr = _related(oc, c2, ca.rev())

454

cr = _related(oc, c2, ca.rev())

447

if cr and (of == f or of == c2.path()): # non-divergent

455

if cr and (of == f or of == c2.path()): # non-divergent

448

copy[f] = of

456

copy[f] = of

449

of = None

457

of = None

450

break

458

break

451

459

452

if of in ma:

460

if of in ma:

453

diverge.setdefault(of, []).append(f)

461

diverge.setdefault(of, []).append(f)

454

462

455

def duplicatecopies(repo, rev, fromrev, skiprev=None):

463

def duplicatecopies(repo, rev, fromrev, skiprev=None):

456

'''reproduce copies from fromrev to rev in the dirstate

464

'''reproduce copies from fromrev to rev in the dirstate

457

465

458

If skiprev is specified, it's a revision that should be used to

466

If skiprev is specified, it's a revision that should be used to

459

filter copy records. Any copies that occur between fromrev and

467

filter copy records. Any copies that occur between fromrev and

460

skiprev will not be duplicated, even if they appear in the set of

468

skiprev will not be duplicated, even if they appear in the set of

461

copies between fromrev and rev.

469

copies between fromrev and rev.

462

'''

470

'''

463

exclude = {}

471

exclude = {}

464

if skiprev is not None:

472

if skiprev is not None:

465

exclude = pathcopies(repo[fromrev], repo[skiprev])

473

exclude = pathcopies(repo[fromrev], repo[skiprev])

466

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

474

for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():

467

# copies.pathcopies returns backward renames, so dst might not

475

# copies.pathcopies returns backward renames, so dst might not

468

# actually be in the dirstate

476

# actually be in the dirstate

469

if dst in exclude:

477

if dst in exclude:

470

continue

478

continue

471

if repo.dirstate[dst] in "nma":

479

if repo.dirstate[dst] in "nma":

472

repo.dirstate.copy(src, dst)

480

repo.dirstate.copy(src, dst)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
             def _nonoverlap(d1, d2, d3):
                 "Return list of elements in d1 not in d2 or d3"
                 return sorted([d for d in d1 if d not in d3 and d not in d2])
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _findlimit(repo, a, b):
                 """
                 Find the last revision that needs to be checked to ensure that a full
                 transitive closure for file copies can be properly calculated.
                 Generally, this means finding the earliest revision number that's an
                 ancestor of a or b but not both, except when a or b is a direct descendent
                 of the other, in which case we can return the minimum revnum of a and b.
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 # Consider the following flow (see test-commit-amend.t under issue4405):
                 # 1/ File 'a0' committed
                 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
                 # 3/ Move back to first commit
                 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
                 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
                 #
                 # During the amend in step five, we will be in this state:
                 #
                 # @  3 temporary amend commit for a1-amend
                 # |
                 # o  2 a1-amend
                 # |
                 # | o  1 a1
                 # |/
                 # o  0 a0
                 #
                 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
                 # yet the filelog has the copy information in rev 1 and we will not look
                 # back far enough unless we also look at the a and b as candidates.
                 # This only occurs when a is a descendent of b or visa-versa.
                 return min(limit, a, b)
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 missing = set(b.manifest().iterkeys())
                 missing.difference_update(a.manifest().iterkeys())
                 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
                 for f in missing:
                     fctx = b[f]
                     fctx._ancestrycontext = ancestrycontext
                     ofctx = _tracefile(fctx, am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
+            def _computenonoverlap(repo, m1, m2, ma):
+                """Computes the files exclusive to m1 and m2.
+                This is its own function so extensions can easily wrap this call to see what
+                files mergecopies is about to process.
+                """
+                u1 = _nonoverlap(m1, m2, ma)
+                u2 = _nonoverlap(m2, m1, ma)
+                if u1:
+                    repo.ui.debug("  unmatched files in local:\n   %s\n"
+                                  % "\n   ".join(u1))
+                if u2:
+                    repo.ui.debug("  unmatched files in other:\n   %s\n"
+                                  % "\n   ".join(u2))
+                return u1, u2
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def makectx(f, n):
                     if len(n) != 20: # in a working context?
                         if c1.rev() is None:
                             return c1.filectx(f)
                         return c2.filectx(f)
                     return repo.filectx(f, fileid=n)
                 ctx = util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
-                u1 = _nonoverlap(m1, m2, ma)
+                u1, u2 = _computenonoverlap(repo, m1, m2, ma)
-                u2 = _nonoverlap(m2, m1, ma)
-                if u1:
-                    repo.ui.debug("  unmatched files in local:\n   %s\n"
-                                  % "\n   ".join(u1))
-                if u2:
-                    repo.ui.debug("  unmatched files in other:\n   %s\n"
-                                  % "\n   ".join(u2))
                 for f in u1:
                     checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
                 for f in u2:
                     checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
                 bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge, _copy, _fullcopy = {}, {}, {}
                 for f in bothnew:
                     checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
                     checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
                 if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
                 """
                 check possible copies of f from m1 to m2
                 ctx = function accepting (filename, node) that returns a filectx.
                 f = the filename to check
                 m1 = the source manifest
                 m2 = the destination manifest
                 ca = the changectx of the common ancestor
                 limit = the rev number to not search beyond
                 diverge = record all diverges in this dict
                 copy = record all non-divergent copies in this dict
                 fullcopy = record all copies in this dict
                 """
                 ma = ca.manifest()
                 def _related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.rev(), f2.rev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.rev(), f2.rev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 of = None
                 seen = set([f])
                 for oc in ctx(f, m1[f]).ancestors():
                     ocr = oc.rev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     fullcopy[f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == ma.get(of):
                         break # no merge needed, quit early
                     c2 = ctx(of, m2[of])
                     cr = _related(oc, c2, ca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         copy[f] = of
                         of = None
                         break
                 if of in ma:
                     diverge.setdefault(of, []).append(f)
             def duplicatecopies(repo, rev, fromrev, skiprev=None):
                 '''reproduce copies from fromrev to rev in the dirstate
                 If skiprev is specified, it's a revision that should be used to
                 filter copy records. Any copies that occur between fromrev and
                 skiprev will not be duplicated, even if they appear in the set of
                 copies between fromrev and rev.
                 '''
                 exclude = {}
                 if skiprev is not None:
                     exclude = pathcopies(repo[fromrev], repo[skiprev])
                 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
                     # copies.pathcopies returns backward renames, so dst might not
                     # actually be in the dirstate
                     if dst in exclude:
                         continue
                     if repo.dirstate[dst] in "nma":
                         repo.dirstate.copy(src, dst)