upstream/mercurial-mirror Commit - r19178:4327687c

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _nonoverlap(d1, d2, d3):

11

def _nonoverlap(d1, d2, d3):

12

"Return list of elements in d1 not in d2 or d3"

12

"Return list of elements in d1 not in d2 or d3"

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

14

15

def _dirname(f):

15

def _dirname(f):

16

s = f.rfind("/")

16

s = f.rfind("/")

17

if s == -1:

17

if s == -1:

18

return ""

18

return ""

19

return f[:s]

19

return f[:s]

20

21

def _findlimit(repo, a, b):

21

def _findlimit(repo, a, b):

22

"""Find the earliest revision that's an ancestor of a or b but not both,

22

"""Find the earliest revision that's an ancestor of a or b but not both,

23

None if no such revision exists.

23

None if no such revision exists.

24

"""

24

"""

25

# basic idea:

25

# basic idea:

26

# - mark a and b with different sides

26

# - mark a and b with different sides

27

# - if a parent's children are all on the same side, the parent is

27

# - if a parent's children are all on the same side, the parent is

28

# on that side, otherwise it is on no side

28

# on that side, otherwise it is on no side

29

# - walk the graph in topological order with the help of a heap;

29

# - walk the graph in topological order with the help of a heap;

30

# - add unseen parents to side map

30

# - add unseen parents to side map

31

# - clear side of any parent that has children on different sides

31

# - clear side of any parent that has children on different sides

32

# - track number of interesting revs that might still be on a side

32

# - track number of interesting revs that might still be on a side

33

# - track the lowest interesting rev seen

33

# - track the lowest interesting rev seen

34

# - quit when interesting revs is zero

34

# - quit when interesting revs is zero

35

36

cl = repo.changelog

36

cl = repo.changelog

37

working = len(cl) # pseudo rev for the working directory

37

working = len(cl) # pseudo rev for the working directory

38

if a is None:

38

if a is None:

39

a = working

39

a = working

40

if b is None:

40

if b is None:

41

b = working

41

b = working

42

43

side = {a: -1, b: 1}

43

side = {a: -1, b: 1}

44

visit = [-a, -b]

44

visit = [-a, -b]

45

heapq.heapify(visit)

45

heapq.heapify(visit)

46

interesting = len(visit)

46

interesting = len(visit)

47

hascommonancestor = False

47

hascommonancestor = False

48

limit = working

48

limit = working

49

50

while interesting:

50

while interesting:

51

r = -heapq.heappop(visit)

51

r = -heapq.heappop(visit)

52

if r == working:

52

if r == working:

53

parents = [cl.rev(p) for p in repo.dirstate.parents()]

53

parents = [cl.rev(p) for p in repo.dirstate.parents()]

54

else:

54

else:

55

parents = cl.parentrevs(r)

55

parents = cl.parentrevs(r)

56

for p in parents:

56

for p in parents:

57

if p < 0:

57

if p < 0:

58

continue

58

continue

59

if p not in side:

59

if p not in side:

60

# first time we see p; add it to visit

60

# first time we see p; add it to visit

61

side[p] = side[r]

61

side[p] = side[r]

62

if side[p]:

62

if side[p]:

63

interesting += 1

63

interesting += 1

64

heapq.heappush(visit, -p)

64

heapq.heappush(visit, -p)

65

elif side[p] and side[p] != side[r]:

65

elif side[p] and side[p] != side[r]:

66

# p was interesting but now we know better

66

# p was interesting but now we know better

67

side[p] = 0

67

side[p] = 0

68

interesting -= 1

68

interesting -= 1

69

hascommonancestor = True

69

hascommonancestor = True

70

if side[r]:

70

if side[r]:

71

limit = r # lowest rev visited

71

limit = r # lowest rev visited

72

interesting -= 1

72

interesting -= 1

73

74

if not hascommonancestor:

74

if not hascommonancestor:

75

return None

75

return None

76

return limit

76

return limit

77

78

def _chain(src, dst, a, b):

78

def _chain(src, dst, a, b):

79

'''chain two sets of copies a->b'''

79

'''chain two sets of copies a->b'''

80

t = a.copy()

80

t = a.copy()

81

for k, v in b.iteritems():

81

for k, v in b.iteritems():

82

if v in t:

82

if v in t:

83

# found a chain

83

# found a chain

84

if t[v] != k:

84

if t[v] != k:

85

# file wasn't renamed back to itself

85

# file wasn't renamed back to itself

86

t[k] = t[v]

86

t[k] = t[v]

87

if v not in dst:

87

if v not in dst:

88

# chain was a rename, not a copy

88

# chain was a rename, not a copy

89

del t[v]

89

del t[v]

90

if v in src:

90

if v in src:

91

# file is a copy of an existing file

91

# file is a copy of an existing file

92

t[k] = v

92

t[k] = v

93

94

# remove criss-crossed copies

94

# remove criss-crossed copies

95

for k, v in t.items():

95

for k, v in t.items():

96

if k in src and v in dst:

96

if k in src and v in dst:

97

del t[k]

97

del t[k]

98

99

return t

99

return t

100

101

def _tracefile(fctx, actx):

101

def _tracefile(fctx, actx):

102

'''return file context that is the ancestor of fctx present in actx'''

102

'''return file context that is the ancestor of fctx present in actx'''

103

stop = actx.rev()

103

stop = actx.rev()

104

am = actx.manifest()

104

am = actx.manifest()

105

106

for f in fctx.ancestors():

106

for f in fctx.ancestors():

107

if am.get(f.path(), None) == f.filenode():

107

if am.get(f.path(), None) == f.filenode():

108

return f

108

return f

109

if f.rev() < stop:

109

if f.rev() < stop:

110

return None

110

return None

111

112

def _dirstatecopies(d):

112

def _dirstatecopies(d):

113

ds = d._repo.dirstate

113

ds = d._repo.dirstate

114

c = ds.copies().copy()

114

c = ds.copies().copy()

115

for k in c.keys():

115

for k in c.keys():

116

if ds[k] not in 'anm':

116

if ds[k] not in 'anm':

117

del c[k]

117

del c[k]

118

return c

118

return c

119

120

def _forwardcopies(a, b):

120

def _forwardcopies(a, b):

121

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

121

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

122

123

# check for working copy

123

# check for working copy

124

w = None

124

w = None

125

if b.rev() is None:

125

if b.rev() is None:

126

w = b

126

w = b

127

b = w.p1()

127

b = w.p1()

128

if a == b:

128

if a == b:

129

# short-circuit to avoid issues with merge states

129

# short-circuit to avoid issues with merge states

130

return _dirstatecopies(w)

130

return _dirstatecopies(w)

131

132

# find where new files came from

132

# find where new files came from

133

# we currently don't try to find where old files went, too expensive

133

# we currently don't try to find where old files went, too expensive

134

# this means we can miss a case like 'hg rm b; hg cp a b'

134

# this means we can miss a case like 'hg rm b; hg cp a b'

135

cm = {}

135

cm = {}

136

missing = set(b.manifest().iterkeys())

136

missing = set(b.manifest().iterkeys())

137

missing.difference_update(a.manifest().iterkeys())

137

missing.difference_update(a.manifest().iterkeys())

138

139

for f in missing:

139

for f in missing:

140

ofctx = _tracefile(b[f], a)

140

ofctx = _tracefile(b[f], a)

141

if ofctx:

141

if ofctx:

142

cm[f] = ofctx.path()

142

cm[f] = ofctx.path()

143

144

# combine copies from dirstate if necessary

144

# combine copies from dirstate if necessary

145

if w is not None:

145

if w is not None:

146

cm = _chain(a, w, cm, _dirstatecopies(w))

146

cm = _chain(a, w, cm, _dirstatecopies(w))

147

148

return cm

148

return cm

149

150

def _backwardrenames(a, b):

150

def _backwardrenames(a, b):

151

# Even though we're not taking copies into account, 1:n rename situations

151

# Even though we're not taking copies into account, 1:n rename situations

152

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

152

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

153

# arbitrarily pick one of the renames.

153

# arbitrarily pick one of the renames.

154

f = _forwardcopies(b, a)

154

f = _forwardcopies(b, a)

155

r = {}

155

r = {}

156

for k, v in sorted(f.iteritems()):

156

for k, v in sorted(f.iteritems()):

157

# remove copies

157

# remove copies

158

if v in a:

158

if v in a:

159

continue

159

continue

160

r[v] = k

160

r[v] = k

161

return r

161

return r

162

163

def pathcopies(x, y):

163

def pathcopies(x, y):

164

'''find {dst@y: src@x} copy mapping for directed compare'''

164

'''find {dst@y: src@x} copy mapping for directed compare'''

165

if x == y or not x or not y:

165

if x == y or not x or not y:

166

return {}

166

return {}

167

a = y.ancestor(x)

167

a = y.ancestor(x)

168

if a == x:

168

if a == x:

169

return _forwardcopies(x, y)

169

return _forwardcopies(x, y)

170

if a == y:

170

if a == y:

171

return _backwardrenames(x, y)

171

return _backwardrenames(x, y)

172

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

172

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

173

174

def mergecopies(repo, c1, c2, ca):

174

def mergecopies(repo, c1, c2, ca):

175

"""

175

"""

176

Find moves and copies between context c1 and c2 that are relevant

176

Find moves and copies between context c1 and c2 that are relevant

177

for merging.

177

for merging.

178

179

Returns four dicts: "copy", "movewithdir", "diverge", and

179

Returns four dicts: "copy", "movewithdir", "diverge", and

180

"renamedelete".

180

"renamedelete".

181

182

"copy" is a mapping from destination name -> source name,

182

"copy" is a mapping from destination name -> source name,

183

where source is in c1 and destination is in c2 or vice-versa.

183

where source is in c1 and destination is in c2 or vice-versa.

184

185

"movewithdir" is a mapping from source name -> destination name,

185

"movewithdir" is a mapping from source name -> destination name,

186

where the file at source present in one context but not the other

186

where the file at source present in one context but not the other

187

needs to be moved to destination by the merge process, because the

187

needs to be moved to destination by the merge process, because the

188

other context moved the directory it is in.

188

other context moved the directory it is in.

189

190

"diverge" is a mapping of source name -> list of destination names

190

"diverge" is a mapping of source name -> list of destination names

191

for divergent renames.

191

for divergent renames.

192

193

"renamedelete" is a mapping of source name -> list of destination

193

"renamedelete" is a mapping of source name -> list of destination

194

names for files deleted in c1 that were renamed in c2 or vice-versa.

194

names for files deleted in c1 that were renamed in c2 or vice-versa.

195

"""

195

"""

196

# avoid silly behavior for update from empty dir

196

# avoid silly behavior for update from empty dir

197

if not c1 or not c2 or c1 == c2:

197

if not c1 or not c2 or c1 == c2:

198

return {}, {}, {}, {}

198

return {}, {}, {}, {}

199

200

# avoid silly behavior for parent -> working dir

200

# avoid silly behavior for parent -> working dir

201

if c2.node() is None and c1.node() == repo.dirstate.p1():

201

if c2.node() is None and c1.node() == repo.dirstate.p1():

202

return repo.dirstate.copies(), {}, {}, {}

202

return repo.dirstate.copies(), {}, {}, {}

203

204

limit = _findlimit(repo, c1.rev(), c2.rev())

204

limit = _findlimit(repo, c1.rev(), c2.rev())

205

if limit is None:

205

if limit is None:

206

# no common ancestor, no copies

206

# no common ancestor, no copies

207

return {}, {}, {}, {}

207

return {}, {}, {}, {}

208

m1 = c1.manifest()

208

m1 = c1.manifest()

209

m2 = c2.manifest()

209

m2 = c2.manifest()

210

ma = ca.manifest()

210

ma = ca.manifest()

211

212

def makectx(f, n):

212

def makectx(f, n):

213

if len(n) != 20: # in a working context?

213

if len(n) != 20: # in a working context?

214

if c1.rev() is None:

214

if c1.rev() is None:

215

return c1.filectx(f)

215

return c1.filectx(f)

216

return c2.filectx(f)

216

return c2.filectx(f)

217

return repo.filectx(f, fileid=n)

217

return repo.filectx(f, fileid=n)

218

219

ctx = util.lrucachefunc(makectx)

219

ctx = util.lrucachefunc(makectx)

220

copy = {}

220

copy = {}

221

movewithdir = {}

221

movewithdir = {}

222

fullcopy = {}

222

fullcopy = {}

223

diverge = {}

223

diverge = {}

224

225

def related(f1, f2, limit):

225

def _checkcopies(f, m1, m2):

226

# Walk back to common ancestor to see if the two files originate

226

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

227

# from the same file. Since workingfilectx's rev() is None it messes

228

# up the integer comparison logic, hence the pre-step check for

229

# None (f1 and f2 can only be workingfilectx's initially).

230

231

if f1 == f2:

232

return f1 # a match

233

234

g1, g2 = f1.ancestors(), f2.ancestors()

235

try:

236

f1r, f2r = f1.rev(), f2.rev()

237

238

if f1r is None:

239

f1 = g1.next()

240

if f2r is None:

241

f2 = g2.next()

242

243

while True:

244

f1r, f2r = f1.rev(), f2.rev()

245

if f1r > f2r:

246

f1 = g1.next()

247

elif f2r > f1r:

248

f2 = g2.next()

249

elif f1 == f2:

250

return f1 # a match

251

elif f1r == f2r or f1r < limit or f2r < limit:

252

return False # copy no longer relevant

253

except StopIteration:

254

return False

255

256

def checkcopies(f, m1, m2):

257

'''check possible copies of f from m1 to m2'''

258

of = None

259

seen = set([f])

260

for oc in ctx(f, m1[f]).ancestors():

261

ocr = oc.rev()

262

of = oc.path()

263

if of in seen:

264

# check limit late - grab last rename before

265

if ocr < limit:

266

break

267

continue

268

seen.add(of)

269

270

fullcopy[f] = of # remember for dir rename detection

271

if of not in m2:

272

continue # no match, keep looking

273

if m2[of] == ma.get(of):

274

break # no merge needed, quit early

275

c2 = ctx(of, m2[of])

276

cr = related(oc, c2, ca.rev())

277

if cr and (of == f or of == c2.path()): # non-divergent

278

copy[f] = of

279

of = None

280

break

281

282

if of in ma:

283

diverge.setdefault(of, []).append(f)

284

227

285

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

228

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

286

229

287

u1 = _nonoverlap(m1, m2, ma)

230

u1 = _nonoverlap(m1, m2, ma)

288

u2 = _nonoverlap(m2, m1, ma)

231

u2 = _nonoverlap(m2, m1, ma)

289

232

290

if u1:

233

if u1:

291

repo.ui.debug(" unmatched files in local:\n %s\n"

234

repo.ui.debug(" unmatched files in local:\n %s\n"

292

% "\n ".join(u1))

235

% "\n ".join(u1))

293

if u2:

236

if u2:

294

repo.ui.debug(" unmatched files in other:\n %s\n"

237

repo.ui.debug(" unmatched files in other:\n %s\n"

295

% "\n ".join(u2))

238

% "\n ".join(u2))

296

239

297

for f in u1:

240

for f in u1:

298

checkcopies(f, m1, m2)

241

_checkcopies(f, m1, m2)

299

for f in u2:

242

for f in u2:

300

checkcopies(f, m2, m1)

243

_checkcopies(f, m2, m1)

301

244

302

renamedelete = {}

245

renamedelete = {}

303

renamedelete2 = set()

246

renamedelete2 = set()

304

diverge2 = set()

247

diverge2 = set()

305

for of, fl in diverge.items():

248

for of, fl in diverge.items():

306

if len(fl) == 1 or of in c1 or of in c2:

249

if len(fl) == 1 or of in c1 or of in c2:

307

del diverge[of] # not actually divergent, or not a rename

250

del diverge[of] # not actually divergent, or not a rename

308

if of not in c1 and of not in c2:

251

if of not in c1 and of not in c2:

309

# renamed on one side, deleted on the other side, but filter

252

# renamed on one side, deleted on the other side, but filter

310

# out files that have been renamed and then deleted

253

# out files that have been renamed and then deleted

311

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

254

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

312

renamedelete2.update(fl) # reverse map for below

255

renamedelete2.update(fl) # reverse map for below

313

else:

256

else:

314

diverge2.update(fl) # reverse map for below

257

diverge2.update(fl) # reverse map for below

315

258

316

if fullcopy:

259

if fullcopy:

317

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

260

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

318

"% = renamed and deleted):\n")

261

"% = renamed and deleted):\n")

319

for f in sorted(fullcopy):

262

for f in sorted(fullcopy):

320

note = ""

263

note = ""

321

if f in copy:

264

if f in copy:

322

note += "*"

265

note += "*"

323

if f in diverge2:

266

if f in diverge2:

324

note += "!"

267

note += "!"

325

if f in renamedelete2:

268

if f in renamedelete2:

326

note += "%"

269

note += "%"

327

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

270

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

328

note))

271

note))

329

del diverge2

272

del diverge2

330

273

331

if not fullcopy:

274

if not fullcopy:

332

return copy, movewithdir, diverge, renamedelete

275

return copy, movewithdir, diverge, renamedelete

333

276

334

repo.ui.debug(" checking for directory renames\n")

277

repo.ui.debug(" checking for directory renames\n")

335

278

336

# generate a directory move map

279

# generate a directory move map

337

d1, d2 = c1.dirs(), c2.dirs()

280

d1, d2 = c1.dirs(), c2.dirs()

338

d1.addpath('/')

281

d1.addpath('/')

339

d2.addpath('/')

282

d2.addpath('/')

340

invalid = set()

283

invalid = set()

341

dirmove = {}

284

dirmove = {}

342

285

343

# examine each file copy for a potential directory move, which is

286

# examine each file copy for a potential directory move, which is

344

# when all the files in a directory are moved to a new directory

287

# when all the files in a directory are moved to a new directory

345

for dst, src in fullcopy.iteritems():

288

for dst, src in fullcopy.iteritems():

346

dsrc, ddst = _dirname(src), _dirname(dst)

289

dsrc, ddst = _dirname(src), _dirname(dst)

347

if dsrc in invalid:

290

if dsrc in invalid:

348

# already seen to be uninteresting

291

# already seen to be uninteresting

349

continue

292

continue

350

elif dsrc in d1 and ddst in d1:

293

elif dsrc in d1 and ddst in d1:

351

# directory wasn't entirely moved locally

294

# directory wasn't entirely moved locally

352

invalid.add(dsrc)

295

invalid.add(dsrc)

353

elif dsrc in d2 and ddst in d2:

296

elif dsrc in d2 and ddst in d2:

354

# directory wasn't entirely moved remotely

297

# directory wasn't entirely moved remotely

355

invalid.add(dsrc)

298

invalid.add(dsrc)

356

elif dsrc in dirmove and dirmove[dsrc] != ddst:

299

elif dsrc in dirmove and dirmove[dsrc] != ddst:

357

# files from the same directory moved to two different places

300

# files from the same directory moved to two different places

358

invalid.add(dsrc)

301

invalid.add(dsrc)

359

else:

302

else:

360

# looks good so far

303

# looks good so far

361

dirmove[dsrc + "/"] = ddst + "/"

304

dirmove[dsrc + "/"] = ddst + "/"

362

305

363

for i in invalid:

306

for i in invalid:

364

if i in dirmove:

307

if i in dirmove:

365

del dirmove[i]

308

del dirmove[i]

366

del d1, d2, invalid

309

del d1, d2, invalid

367

310

368

if not dirmove:

311

if not dirmove:

369

return copy, movewithdir, diverge, renamedelete

312

return copy, movewithdir, diverge, renamedelete

370

313

371

for d in dirmove:

314

for d in dirmove:

372

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

315

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

373

(d, dirmove[d]))

316

(d, dirmove[d]))

374

317

375

# check unaccounted nonoverlapping files against directory moves

318

# check unaccounted nonoverlapping files against directory moves

376

for f in u1 + u2:

319

for f in u1 + u2:

377

if f not in fullcopy:

320

if f not in fullcopy:

378

for d in dirmove:

321

for d in dirmove:

379

if f.startswith(d):

322

if f.startswith(d):

380

# new file added in a directory that was moved, move it

323

# new file added in a directory that was moved, move it

381

df = dirmove[d] + f[len(d):]

324

df = dirmove[d] + f[len(d):]

382

if df not in copy:

325

if df not in copy:

383

movewithdir[f] = df

326

movewithdir[f] = df

384

repo.ui.debug((" pending file src: '%s' -> "

327

repo.ui.debug((" pending file src: '%s' -> "

385

"dst: '%s'\n") % (f, df))

328

"dst: '%s'\n") % (f, df))

386

break

329

break

387

330

388

return copy, movewithdir, diverge, renamedelete

331

return copy, movewithdir, diverge, renamedelete

332

333

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

334

"""

335

check possible copies of f from m1 to m2

336

337

ctx = function accepting (filename, node) that returns a filectx.

338

f = the filename to check

339

m1 = the source manifest

340

m2 = the destination manifest

341

ca = the changectx of the common ancestor

342

limit = the rev number to not search beyond

343

diverge = record all diverges in this dict

344

copy = record all non-divergent copies in this dict

345

fullcopy = record all copies in this dict

346

"""

347

348

ma = ca.manifest()

349

350

def _related(f1, f2, limit):

351

# Walk back to common ancestor to see if the two files originate

352

# from the same file. Since workingfilectx's rev() is None it messes

353

# up the integer comparison logic, hence the pre-step check for

354

# None (f1 and f2 can only be workingfilectx's initially).

355

356

if f1 == f2:

357

return f1 # a match

358

359

g1, g2 = f1.ancestors(), f2.ancestors()

360

try:

361

f1r, f2r = f1.rev(), f2.rev()

362

363

if f1r is None:

364

f1 = g1.next()

365

if f2r is None:

366

f2 = g2.next()

367

368

while True:

369

f1r, f2r = f1.rev(), f2.rev()

370

if f1r > f2r:

371

f1 = g1.next()

372

elif f2r > f1r:

373

f2 = g2.next()

374

elif f1 == f2:

375

return f1 # a match

376

elif f1r == f2r or f1r < limit or f2r < limit:

377

return False # copy no longer relevant

378

except StopIteration:

379

return False

380

381

of = None

382

seen = set([f])

383

for oc in ctx(f, m1[f]).ancestors():

384

ocr = oc.rev()

385

of = oc.path()

386

if of in seen:

387

# check limit late - grab last rename before

388

if ocr < limit:

389

break

390

continue

391

seen.add(of)

392

393

fullcopy[f] = of # remember for dir rename detection

394

if of not in m2:

395

continue # no match, keep looking

396

if m2[of] == ma.get(of):

397

break # no merge needed, quit early

398

c2 = ctx(of, m2[of])

399

cr = _related(oc, c2, ca.rev())

400

if cr and (of == f or of == c2.path()): # non-divergent

401

copy[f] = of

402

of = None

403

break

404

405

if of in ma:

406

diverge.setdefault(of, []).append(f)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
             def _nonoverlap(d1, d2, d3):
                 "Return list of elements in d1 not in d2 or d3"
                 return sorted([d for d in d1 if d not in d3 and d not in d2])
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _findlimit(repo, a, b):
                 """Find the earliest revision that's an ancestor of a or b but not both,
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 return limit
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, actx):
                 '''return file context that is the ancestor of fctx present in actx'''
                 stop = actx.rev()
                 am = actx.manifest()
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if f.rev() < stop:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 missing = set(b.manifest().iterkeys())
                 missing.difference_update(a.manifest().iterkeys())
                 for f in missing:
                     ofctx = _tracefile(b[f], a)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def makectx(f, n):
                     if len(n) != 20: # in a working context?
                         if c1.rev() is None:
                             return c1.filectx(f)
                         return c2.filectx(f)
                     return repo.filectx(f, fileid=n)
                 ctx = util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
-                def related(f1, f2, limit):
+                def _checkcopies(f, m1, m2):
-                    # Walk back to common ancestor to see if the two files originate
+                    checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
-                    # from the same file. Since workingfilectx's rev() is None it messes
-                    # up the integer comparison logic, hence the pre-step check for
-                    # None (f1 and f2 can only be workingfilectx's initially).
-                    if f1 == f2:
-                        return f1 # a match
-                    g1, g2 = f1.ancestors(), f2.ancestors()
-                    try:
-                        f1r, f2r = f1.rev(), f2.rev()
-                        if f1r is None:
-                            f1 = g1.next()
-                        if f2r is None:
-                            f2 = g2.next()
-                        while True:
-                            f1r, f2r = f1.rev(), f2.rev()
-                            if f1r > f2r:
-                                f1 = g1.next()
-                            elif f2r > f1r:
-                                f2 = g2.next()
-                            elif f1 == f2:
-                                return f1 # a match
-                            elif f1r == f2r or f1r < limit or f2r < limit:
-                                return False # copy no longer relevant
-                    except StopIteration:
-                        return False
-                def checkcopies(f, m1, m2):
-                    '''check possible copies of f from m1 to m2'''
-                    of = None
-                    seen = set([f])
-                    for oc in ctx(f, m1[f]).ancestors():
-                        ocr = oc.rev()
-                        of = oc.path()
-                        if of in seen:
-                            # check limit late - grab last rename before
-                            if ocr < limit:
-                                break
-                            continue
-                        seen.add(of)
-                        fullcopy[f] = of # remember for dir rename detection
-                        if of not in m2:
-                            continue # no match, keep looking
-                        if m2[of] == ma.get(of):
-                            break # no merge needed, quit early
-                        c2 = ctx(of, m2[of])
-                        cr = related(oc, c2, ca.rev())
-                        if cr and (of == f or of == c2.path()): # non-divergent
-                            copy[f] = of
-                            of = None
-                            break
-                    if of in ma:
-                        diverge.setdefault(of, []).append(f)
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 u1 = _nonoverlap(m1, m2, ma)
                 u2 = _nonoverlap(m2, m1, ma)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 for f in u1:
-                    checkcopies(f, m1, m2)
+                    _checkcopies(f, m1, m2)
                 for f in u2:
-                    checkcopies(f, m2, m1)
+                    _checkcopies(f, m2, m1)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
                 if fullcopy:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
+            def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
+                """
+                check possible copies of f from m1 to m2
+                ctx = function accepting (filename, node) that returns a filectx.
+                f = the filename to check
+                m1 = the source manifest
+                m2 = the destination manifest
+                ca = the changectx of the common ancestor
+                limit = the rev number to not search beyond
+                diverge = record all diverges in this dict
+                copy = record all non-divergent copies in this dict
+                fullcopy = record all copies in this dict
+                """
+                ma = ca.manifest()
+                def _related(f1, f2, limit):
+                    # Walk back to common ancestor to see if the two files originate
+                    # from the same file. Since workingfilectx's rev() is None it messes
+                    # up the integer comparison logic, hence the pre-step check for
+                    # None (f1 and f2 can only be workingfilectx's initially).
+                    if f1 == f2:
+                        return f1 # a match
+                    g1, g2 = f1.ancestors(), f2.ancestors()
+                    try:
+                        f1r, f2r = f1.rev(), f2.rev()
+                        if f1r is None:
+                            f1 = g1.next()
+                        if f2r is None:
+                            f2 = g2.next()
+                        while True:
+                            f1r, f2r = f1.rev(), f2.rev()
+                            if f1r > f2r:
+                                f1 = g1.next()
+                            elif f2r > f1r:
+                                f2 = g2.next()
+                            elif f1 == f2:
+                                return f1 # a match
+                            elif f1r == f2r or f1r < limit or f2r < limit:
+                                return False # copy no longer relevant
+                    except StopIteration:
+                        return False
+                of = None
+                seen = set([f])
+                for oc in ctx(f, m1[f]).ancestors():
+                    ocr = oc.rev()
+                    of = oc.path()
+                    if of in seen:
+                        # check limit late - grab last rename before
+                        if ocr < limit:
+                            break
+                        continue
+                    seen.add(of)
+                    fullcopy[f] = of # remember for dir rename detection
+                    if of not in m2:
+                        continue # no match, keep looking
+                    if m2[of] == ma.get(of):
+                        break # no merge needed, quit early
+                    c2 = ctx(of, m2[of])
+                    cr = _related(oc, c2, ca.rev())
+                    if cr and (of == f or of == c2.path()): # non-divergent
+                        copy[f] = of
+                        of = None
+                        break
+                if of in ma:
+                    diverge.setdefault(of, []).append(f)