upstream/mercurial-mirror Commit - r20990:d9e211a6

1

# copies.py - copy detection for Mercurial

1

# copies.py - copy detection for Mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import util

8

import util

9

import heapq

9

import heapq

10

11

def _nonoverlap(d1, d2, d3):

11

def _nonoverlap(d1, d2, d3):

12

"Return list of elements in d1 not in d2 or d3"

12

"Return list of elements in d1 not in d2 or d3"

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

13

return sorted([d for d in d1 if d not in d3 and d not in d2])

14

15

def _dirname(f):

15

def _dirname(f):

16

s = f.rfind("/")

16

s = f.rfind("/")

17

if s == -1:

17

if s == -1:

18

return ""

18

return ""

19

return f[:s]

19

return f[:s]

20

21

def _findlimit(repo, a, b):

21

def _findlimit(repo, a, b):

22

"""Find the earliest revision that's an ancestor of a or b but not both,

22

"""Find the earliest revision that's an ancestor of a or b but not both,

23

None if no such revision exists.

23

None if no such revision exists.

24

"""

24

"""

25

# basic idea:

25

# basic idea:

26

# - mark a and b with different sides

26

# - mark a and b with different sides

27

# - if a parent's children are all on the same side, the parent is

27

# - if a parent's children are all on the same side, the parent is

28

# on that side, otherwise it is on no side

28

# on that side, otherwise it is on no side

29

# - walk the graph in topological order with the help of a heap;

29

# - walk the graph in topological order with the help of a heap;

30

# - add unseen parents to side map

30

# - add unseen parents to side map

31

# - clear side of any parent that has children on different sides

31

# - clear side of any parent that has children on different sides

32

# - track number of interesting revs that might still be on a side

32

# - track number of interesting revs that might still be on a side

33

# - track the lowest interesting rev seen

33

# - track the lowest interesting rev seen

34

# - quit when interesting revs is zero

34

# - quit when interesting revs is zero

35

36

cl = repo.changelog

36

cl = repo.changelog

37

working = len(cl) # pseudo rev for the working directory

37

working = len(cl) # pseudo rev for the working directory

38

if a is None:

38

if a is None:

39

a = working

39

a = working

40

if b is None:

40

if b is None:

41

b = working

41

b = working

42

43

side = {a: -1, b: 1}

43

side = {a: -1, b: 1}

44

visit = [-a, -b]

44

visit = [-a, -b]

45

heapq.heapify(visit)

45

heapq.heapify(visit)

46

interesting = len(visit)

46

interesting = len(visit)

47

hascommonancestor = False

47

hascommonancestor = False

48

limit = working

48

limit = working

49

50

while interesting:

50

while interesting:

51

r = -heapq.heappop(visit)

51

r = -heapq.heappop(visit)

52

if r == working:

52

if r == working:

53

parents = [cl.rev(p) for p in repo.dirstate.parents()]

53

parents = [cl.rev(p) for p in repo.dirstate.parents()]

54

else:

54

else:

55

parents = cl.parentrevs(r)

55

parents = cl.parentrevs(r)

56

for p in parents:

56

for p in parents:

57

if p < 0:

57

if p < 0:

58

continue

58

continue

59

if p not in side:

59

if p not in side:

60

# first time we see p; add it to visit

60

# first time we see p; add it to visit

61

side[p] = side[r]

61

side[p] = side[r]

62

if side[p]:

62

if side[p]:

63

interesting += 1

63

interesting += 1

64

heapq.heappush(visit, -p)

64

heapq.heappush(visit, -p)

65

elif side[p] and side[p] != side[r]:

65

elif side[p] and side[p] != side[r]:

66

# p was interesting but now we know better

66

# p was interesting but now we know better

67

side[p] = 0

67

side[p] = 0

68

interesting -= 1

68

interesting -= 1

69

hascommonancestor = True

69

hascommonancestor = True

70

if side[r]:

70

if side[r]:

71

limit = r # lowest rev visited

71

limit = r # lowest rev visited

72

interesting -= 1

72

interesting -= 1

73

74

if not hascommonancestor:

74

if not hascommonancestor:

75

return None

75

return None

76

return limit

76

return limit

77

78

def _chain(src, dst, a, b):

78

def _chain(src, dst, a, b):

79

'''chain two sets of copies a->b'''

79

'''chain two sets of copies a->b'''

80

t = a.copy()

80

t = a.copy()

81

for k, v in b.iteritems():

81

for k, v in b.iteritems():

82

if v in t:

82

if v in t:

83

# found a chain

83

# found a chain

84

if t[v] != k:

84

if t[v] != k:

85

# file wasn't renamed back to itself

85

# file wasn't renamed back to itself

86

t[k] = t[v]

86

t[k] = t[v]

87

if v not in dst:

87

if v not in dst:

88

# chain was a rename, not a copy

88

# chain was a rename, not a copy

89

del t[v]

89

del t[v]

90

if v in src:

90

if v in src:

91

# file is a copy of an existing file

91

# file is a copy of an existing file

92

t[k] = v

92

t[k] = v

93

94

# remove criss-crossed copies

94

# remove criss-crossed copies

95

for k, v in t.items():

95

for k, v in t.items():

96

if k in src and v in dst:

96

if k in src and v in dst:

97

del t[k]

97

del t[k]

98

99

return t

99

return t

100

101

def _tracefile(fctx, am, limit=-1):

101

def _tracefile(fctx, am, limit=-1):

102

'''return file context that is the ancestor of fctx present in ancestor

102

'''return file context that is the ancestor of fctx present in ancestor

103

manifest am, stopping after the first ancestor lower than limit'''

103

manifest am, stopping after the first ancestor lower than limit'''

104

105

for f in fctx.ancestors():

105

for f in fctx.ancestors():

106

if am.get(f.path(), None) == f.filenode():

106

if am.get(f.path(), None) == f.filenode():

107

return f

107

return f

108

if f.rev() < limit:

108

if f.rev() < limit:

109

return None

109

return None

110

111

def _dirstatecopies(d):

111

def _dirstatecopies(d):

112

ds = d._repo.dirstate

112

ds = d._repo.dirstate

113

c = ds.copies().copy()

113

c = ds.copies().copy()

114

for k in c.keys():

114

for k in c.keys():

115

if ds[k] not in 'anm':

115

if ds[k] not in 'anm':

116

del c[k]

116

del c[k]

117

return c

117

return c

118

119

def _forwardcopies(a, b):

119

def _forwardcopies(a, b):

120

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

120

'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''

121

122

# check for working copy

122

# check for working copy

123

w = None

123

w = None

124

if b.rev() is None:

124

if b.rev() is None:

125

w = b

125

w = b

126

b = w.p1()

126

b = w.p1()

127

if a == b:

127

if a == b:

128

# short-circuit to avoid issues with merge states

128

# short-circuit to avoid issues with merge states

129

return _dirstatecopies(w)

129

return _dirstatecopies(w)

130

131

# files might have to be traced back to the fctx parent of the last

131

# files might have to be traced back to the fctx parent of the last

132

# one-side-only changeset, but not further back than that

132

# one-side-only changeset, but not further back than that

133

limit = _findlimit(a._repo, a.rev(), b.rev())

133

limit = _findlimit(a._repo, a.rev(), b.rev())

134

if limit is None:

134

if limit is None:

135

limit = -1

135

limit = -1

136

am = a.manifest()

136

am = a.manifest()

137

138

# find where new files came from

138

# find where new files came from

139

# we currently don't try to find where old files went, too expensive

139

# we currently don't try to find where old files went, too expensive

140

# this means we can miss a case like 'hg rm b; hg cp a b'

140

# this means we can miss a case like 'hg rm b; hg cp a b'

141

cm = {}

141

cm = {}

142

missing = set(b.manifest().iterkeys())

142

missing = set(b.manifest().iterkeys())

143

missing.difference_update(a.manifest().iterkeys())

143

missing.difference_update(a.manifest().iterkeys())

144

145

for f in missing:

145

for f in missing:

146

ofctx = _tracefile(b[f], am, limit)

146

ofctx = _tracefile(b[f], am, limit)

147

if ofctx:

147

if ofctx:

148

cm[f] = ofctx.path()

148

cm[f] = ofctx.path()

149

150

# combine copies from dirstate if necessary

150

# combine copies from dirstate if necessary

151

if w is not None:

151

if w is not None:

152

cm = _chain(a, w, cm, _dirstatecopies(w))

152

cm = _chain(a, w, cm, _dirstatecopies(w))

153

154

return cm

154

return cm

155

156

def _backwardrenames(a, b):

156

def _backwardrenames(a, b):

157

# Even though we're not taking copies into account, 1:n rename situations

157

# Even though we're not taking copies into account, 1:n rename situations

158

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

158

# can still exist (e.g. hg cp a b; hg mv a c). In those cases we

159

# arbitrarily pick one of the renames.

159

# arbitrarily pick one of the renames.

160

f = _forwardcopies(b, a)

160

f = _forwardcopies(b, a)

161

r = {}

161

r = {}

162

for k, v in sorted(f.iteritems()):

162

for k, v in sorted(f.iteritems()):

163

# remove copies

163

# remove copies

164

if v in a:

164

if v in a:

165

continue

165

continue

166

r[v] = k

166

r[v] = k

167

return r

167

return r

168

169

def pathcopies(x, y):

169

def pathcopies(x, y):

170

'''find {dst@y: src@x} copy mapping for directed compare'''

170

'''find {dst@y: src@x} copy mapping for directed compare'''

171

if x == y or not x or not y:

171

if x == y or not x or not y:

172

return {}

172

return {}

173

a = y.ancestor(x)

173

a = y.ancestor(x)

174

if a == x:

174

if a == x:

175

return _forwardcopies(x, y)

175

return _forwardcopies(x, y)

176

if a == y:

176

if a == y:

177

return _backwardrenames(x, y)

177

return _backwardrenames(x, y)

178

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

178

return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))

179

180

def mergecopies(repo, c1, c2, ca):

180

def mergecopies(repo, c1, c2, ca):

181

"""

181

"""

182

Find moves and copies between context c1 and c2 that are relevant

182

Find moves and copies between context c1 and c2 that are relevant

183

for merging.

183

for merging.

184

185

Returns four dicts: "copy", "movewithdir", "diverge", and

185

Returns four dicts: "copy", "movewithdir", "diverge", and

186

"renamedelete".

186

"renamedelete".

187

188

"copy" is a mapping from destination name -> source name,

188

"copy" is a mapping from destination name -> source name,

189

where source is in c1 and destination is in c2 or vice-versa.

189

where source is in c1 and destination is in c2 or vice-versa.

190

191

"movewithdir" is a mapping from source name -> destination name,

191

"movewithdir" is a mapping from source name -> destination name,

192

where the file at source present in one context but not the other

192

where the file at source present in one context but not the other

193

needs to be moved to destination by the merge process, because the

193

needs to be moved to destination by the merge process, because the

194

other context moved the directory it is in.

194

other context moved the directory it is in.

195

196

"diverge" is a mapping of source name -> list of destination names

196

"diverge" is a mapping of source name -> list of destination names

197

for divergent renames.

197

for divergent renames.

198

199

"renamedelete" is a mapping of source name -> list of destination

199

"renamedelete" is a mapping of source name -> list of destination

200

names for files deleted in c1 that were renamed in c2 or vice-versa.

200

names for files deleted in c1 that were renamed in c2 or vice-versa.

201

"""

201

"""

202

# avoid silly behavior for update from empty dir

202

# avoid silly behavior for update from empty dir

203

if not c1 or not c2 or c1 == c2:

203

if not c1 or not c2 or c1 == c2:

204

return {}, {}, {}, {}

204

return {}, {}, {}, {}

205

206

# avoid silly behavior for parent -> working dir

206

# avoid silly behavior for parent -> working dir

207

if c2.node() is None and c1.node() == repo.dirstate.p1():

207

if c2.node() is None and c1.node() == repo.dirstate.p1():

208

return repo.dirstate.copies(), {}, {}, {}

208

return repo.dirstate.copies(), {}, {}, {}

209

210

limit = _findlimit(repo, c1.rev(), c2.rev())

210

limit = _findlimit(repo, c1.rev(), c2.rev())

211

if limit is None:

211

if limit is None:

212

# no common ancestor, no copies

212

# no common ancestor, no copies

213

return {}, {}, {}, {}

213

return {}, {}, {}, {}

214

m1 = c1.manifest()

214

m1 = c1.manifest()

215

m2 = c2.manifest()

215

m2 = c2.manifest()

216

ma = ca.manifest()

216

ma = ca.manifest()

217

218

def makectx(f, n):

218

def makectx(f, n):

219

if len(n) != 20: # in a working context?

219

if len(n) != 20: # in a working context?

220

if c1.rev() is None:

220

if c1.rev() is None:

221

return c1.filectx(f)

221

return c1.filectx(f)

222

return c2.filectx(f)

222

return c2.filectx(f)

223

return repo.filectx(f, fileid=n)

223

return repo.filectx(f, fileid=n)

224

225

ctx = util.lrucachefunc(makectx)

225

ctx = util.lrucachefunc(makectx)

226

copy = {}

226

copy = {}

227

movewithdir = {}

227

movewithdir = {}

228

fullcopy = {}

228

fullcopy = {}

229

diverge = {}

229

diverge = {}

230

231

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

231

repo.ui.debug(" searching for copies back to rev %d\n" % limit)

232

233

u1 = _nonoverlap(m1, m2, ma)

233

u1 = _nonoverlap(m1, m2, ma)

234

u2 = _nonoverlap(m2, m1, ma)

234

u2 = _nonoverlap(m2, m1, ma)

235

236

if u1:

236

if u1:

237

repo.ui.debug(" unmatched files in local:\n %s\n"

237

repo.ui.debug(" unmatched files in local:\n %s\n"

238

% "\n ".join(u1))

238

% "\n ".join(u1))

239

if u2:

239

if u2:

240

repo.ui.debug(" unmatched files in other:\n %s\n"

240

repo.ui.debug(" unmatched files in other:\n %s\n"

241

% "\n ".join(u2))

241

% "\n ".join(u2))

242

243

for f in u1:

243

for f in u1:

244

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

244

checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)

245

246

for f in u2:

246

for f in u2:

247

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

247

checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)

248

249

renamedelete = {}

249

renamedelete = {}

250

renamedelete2 = set()

250

renamedelete2 = set()

251

diverge2 = set()

251

diverge2 = set()

252

for of, fl in diverge.items():

252

for of, fl in diverge.items():

253

if len(fl) == 1 or of in c1 or of in c2:

253

if len(fl) == 1 or of in c1 or of in c2:

254

del diverge[of] # not actually divergent, or not a rename

254

del diverge[of] # not actually divergent, or not a rename

255

if of not in c1 and of not in c2:

255

if of not in c1 and of not in c2:

256

# renamed on one side, deleted on the other side, but filter

256

# renamed on one side, deleted on the other side, but filter

257

# out files that have been renamed and then deleted

257

# out files that have been renamed and then deleted

258

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

258

renamedelete[of] = [f for f in fl if f in c1 or f in c2]

259

renamedelete2.update(fl) # reverse map for below

259

renamedelete2.update(fl) # reverse map for below

260

else:

260

else:

261

diverge2.update(fl) # reverse map for below

261

diverge2.update(fl) # reverse map for below

262

263

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

263

bothnew = sorted([d for d in m1 if d in m2 and d not in ma])

264

if bothnew:

264

if bothnew:

265

repo.ui.debug(" unmatched files new in both:\n %s\n"

265

repo.ui.debug(" unmatched files new in both:\n %s\n"

266

% "\n ".join(bothnew))

266

% "\n ".join(bothnew))

267

bothdiverge, _copy, _fullcopy = {}, {}, {}

267

bothdiverge, _copy, _fullcopy = {}, {}, {}

268

for f in bothnew:

268

for f in bothnew:

269

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

269

checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)

270

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

270

checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)

271

for of, fl in bothdiverge.items():

271

for of, fl in bothdiverge.items():

272

if len(fl) == 2 and fl[0] == fl[1]:

272

if len(fl) == 2 and fl[0] == fl[1]:

273

copy[fl[0]] = of # not actually divergent, just matching renames

273

copy[fl[0]] = of # not actually divergent, just matching renames

274

275

if fullcopy:

275

if fullcopy and repo.ui.debugflag:

276

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

276

repo.ui.debug(" all copies found (* = to merge, ! = divergent, "

277

"% = renamed and deleted):\n")

277

"% = renamed and deleted):\n")

278

for f in sorted(fullcopy):

278

for f in sorted(fullcopy):

279

note = ""

279

note = ""

280

if f in copy:

280

if f in copy:

281

note += "*"

281

note += "*"

282

if f in diverge2:

282

if f in diverge2:

283

note += "!"

283

note += "!"

284

if f in renamedelete2:

284

if f in renamedelete2:

285

note += "%"

285

note += "%"

286

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

286

repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,

287

note))

287

note))

288

del diverge2

288

del diverge2

289

290

if not fullcopy:

290

if not fullcopy:

291

return copy, movewithdir, diverge, renamedelete

291

return copy, movewithdir, diverge, renamedelete

292

293

repo.ui.debug(" checking for directory renames\n")

293

repo.ui.debug(" checking for directory renames\n")

294

295

# generate a directory move map

295

# generate a directory move map

296

d1, d2 = c1.dirs(), c2.dirs()

296

d1, d2 = c1.dirs(), c2.dirs()

297

d1.addpath('/')

297

d1.addpath('/')

298

d2.addpath('/')

298

d2.addpath('/')

299

invalid = set()

299

invalid = set()

300

dirmove = {}

300

dirmove = {}

301

302

# examine each file copy for a potential directory move, which is

302

# examine each file copy for a potential directory move, which is

303

# when all the files in a directory are moved to a new directory

303

# when all the files in a directory are moved to a new directory

304

for dst, src in fullcopy.iteritems():

304

for dst, src in fullcopy.iteritems():

305

dsrc, ddst = _dirname(src), _dirname(dst)

305

dsrc, ddst = _dirname(src), _dirname(dst)

306

if dsrc in invalid:

306

if dsrc in invalid:

307

# already seen to be uninteresting

307

# already seen to be uninteresting

308

continue

308

continue

309

elif dsrc in d1 and ddst in d1:

309

elif dsrc in d1 and ddst in d1:

310

# directory wasn't entirely moved locally

310

# directory wasn't entirely moved locally

311

invalid.add(dsrc)

311

invalid.add(dsrc)

312

elif dsrc in d2 and ddst in d2:

312

elif dsrc in d2 and ddst in d2:

313

# directory wasn't entirely moved remotely

313

# directory wasn't entirely moved remotely

314

invalid.add(dsrc)

314

invalid.add(dsrc)

315

elif dsrc in dirmove and dirmove[dsrc] != ddst:

315

elif dsrc in dirmove and dirmove[dsrc] != ddst:

316

# files from the same directory moved to two different places

316

# files from the same directory moved to two different places

317

invalid.add(dsrc)

317

invalid.add(dsrc)

318

else:

318

else:

319

# looks good so far

319

# looks good so far

320

dirmove[dsrc + "/"] = ddst + "/"

320

dirmove[dsrc + "/"] = ddst + "/"

321

322

for i in invalid:

322

for i in invalid:

323

if i in dirmove:

323

if i in dirmove:

324

del dirmove[i]

324

del dirmove[i]

325

del d1, d2, invalid

325

del d1, d2, invalid

326

327

if not dirmove:

327

if not dirmove:

328

return copy, movewithdir, diverge, renamedelete

328

return copy, movewithdir, diverge, renamedelete

329

330

for d in dirmove:

330

for d in dirmove:

331

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

331

repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %

332

(d, dirmove[d]))

332

(d, dirmove[d]))

333

334

# check unaccounted nonoverlapping files against directory moves

334

# check unaccounted nonoverlapping files against directory moves

335

for f in u1 + u2:

335

for f in u1 + u2:

336

if f not in fullcopy:

336

if f not in fullcopy:

337

for d in dirmove:

337

for d in dirmove:

338

if f.startswith(d):

338

if f.startswith(d):

339

# new file added in a directory that was moved, move it

339

# new file added in a directory that was moved, move it

340

df = dirmove[d] + f[len(d):]

340

df = dirmove[d] + f[len(d):]

341

if df not in copy:

341

if df not in copy:

342

movewithdir[f] = df

342

movewithdir[f] = df

343

repo.ui.debug((" pending file src: '%s' -> "

343

repo.ui.debug((" pending file src: '%s' -> "

344

"dst: '%s'\n") % (f, df))

344

"dst: '%s'\n") % (f, df))

345

break

345

break

346

347

return copy, movewithdir, diverge, renamedelete

347

return copy, movewithdir, diverge, renamedelete

348

349

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

349

def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):

350

"""

350

"""

351

check possible copies of f from m1 to m2

351

check possible copies of f from m1 to m2

352

353

ctx = function accepting (filename, node) that returns a filectx.

353

ctx = function accepting (filename, node) that returns a filectx.

354

f = the filename to check

354

f = the filename to check

355

m1 = the source manifest

355

m1 = the source manifest

356

m2 = the destination manifest

356

m2 = the destination manifest

357

ca = the changectx of the common ancestor

357

ca = the changectx of the common ancestor

358

limit = the rev number to not search beyond

358

limit = the rev number to not search beyond

359

diverge = record all diverges in this dict

359

diverge = record all diverges in this dict

360

copy = record all non-divergent copies in this dict

360

copy = record all non-divergent copies in this dict

361

fullcopy = record all copies in this dict

361

fullcopy = record all copies in this dict

362

"""

362

"""

363

364

ma = ca.manifest()

364

ma = ca.manifest()

365

366

def _related(f1, f2, limit):

366

def _related(f1, f2, limit):

367

# Walk back to common ancestor to see if the two files originate

367

# Walk back to common ancestor to see if the two files originate

368

# from the same file. Since workingfilectx's rev() is None it messes

368

# from the same file. Since workingfilectx's rev() is None it messes

369

# up the integer comparison logic, hence the pre-step check for

369

# up the integer comparison logic, hence the pre-step check for

370

# None (f1 and f2 can only be workingfilectx's initially).

370

# None (f1 and f2 can only be workingfilectx's initially).

371

372

if f1 == f2:

372

if f1 == f2:

373

return f1 # a match

373

return f1 # a match

374

375

g1, g2 = f1.ancestors(), f2.ancestors()

375

g1, g2 = f1.ancestors(), f2.ancestors()

376

try:

376

try:

377

f1r, f2r = f1.rev(), f2.rev()

377

f1r, f2r = f1.rev(), f2.rev()

378

379

if f1r is None:

379

if f1r is None:

380

f1 = g1.next()

380

f1 = g1.next()

381

if f2r is None:

381

if f2r is None:

382

f2 = g2.next()

382

f2 = g2.next()

383

384

while True:

384

while True:

385

f1r, f2r = f1.rev(), f2.rev()

385

f1r, f2r = f1.rev(), f2.rev()

386

if f1r > f2r:

386

if f1r > f2r:

387

f1 = g1.next()

387

f1 = g1.next()

388

elif f2r > f1r:

388

elif f2r > f1r:

389

f2 = g2.next()

389

f2 = g2.next()

390

elif f1 == f2:

390

elif f1 == f2:

391

return f1 # a match

391

return f1 # a match

392

elif f1r == f2r or f1r < limit or f2r < limit:

392

elif f1r == f2r or f1r < limit or f2r < limit:

393

return False # copy no longer relevant

393

return False # copy no longer relevant

394

except StopIteration:

394

except StopIteration:

395

return False

395

return False

396

397

of = None

397

of = None

398

seen = set([f])

398

seen = set([f])

399

for oc in ctx(f, m1[f]).ancestors():

399

for oc in ctx(f, m1[f]).ancestors():

400

ocr = oc.rev()

400

ocr = oc.rev()

401

of = oc.path()

401

of = oc.path()

402

if of in seen:

402

if of in seen:

403

# check limit late - grab last rename before

403

# check limit late - grab last rename before

404

if ocr < limit:

404

if ocr < limit:

405

break

405

break

406

continue

406

continue

407

seen.add(of)

407

seen.add(of)

408

409

fullcopy[f] = of # remember for dir rename detection

409

fullcopy[f] = of # remember for dir rename detection

410

if of not in m2:

410

if of not in m2:

411

continue # no match, keep looking

411

continue # no match, keep looking

412

if m2[of] == ma.get(of):

412

if m2[of] == ma.get(of):

413

break # no merge needed, quit early

413

break # no merge needed, quit early

414

c2 = ctx(of, m2[of])

414

c2 = ctx(of, m2[of])

415

cr = _related(oc, c2, ca.rev())

415

cr = _related(oc, c2, ca.rev())

416

if cr and (of == f or of == c2.path()): # non-divergent

416

if cr and (of == f or of == c2.path()): # non-divergent

417

copy[f] = of

417

copy[f] = of

418

of = None

418

of = None

419

break

419

break

420

421

if of in ma:

421

if of in ma:

422

diverge.setdefault(of, []).append(f)

422

diverge.setdefault(of, []).append(f)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # copies.py - copy detection for Mercurial
             #
             # Copyright 2008 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import util
             import heapq
             def _nonoverlap(d1, d2, d3):
                 "Return list of elements in d1 not in d2 or d3"
                 return sorted([d for d in d1 if d not in d3 and d not in d2])
             def _dirname(f):
                 s = f.rfind("/")
                 if s == -1:
                     return ""
                 return f[:s]
             def _findlimit(repo, a, b):
                 """Find the earliest revision that's an ancestor of a or b but not both,
                 None if no such revision exists.
                 """
                 # basic idea:
                 # - mark a and b with different sides
                 # - if a parent's children are all on the same side, the parent is
                 #   on that side, otherwise it is on no side
                 # - walk the graph in topological order with the help of a heap;
                 #   - add unseen parents to side map
                 #   - clear side of any parent that has children on different sides
                 #   - track number of interesting revs that might still be on a side
                 #   - track the lowest interesting rev seen
                 #   - quit when interesting revs is zero
                 cl = repo.changelog
                 working = len(cl) # pseudo rev for the working directory
                 if a is None:
                     a = working
                 if b is None:
                     b = working
                 side = {a: -1, b: 1}
                 visit = [-a, -b]
                 heapq.heapify(visit)
                 interesting = len(visit)
                 hascommonancestor = False
                 limit = working
                 while interesting:
                     r = -heapq.heappop(visit)
                     if r == working:
                         parents = [cl.rev(p) for p in repo.dirstate.parents()]
                     else:
                         parents = cl.parentrevs(r)
                     for p in parents:
                         if p < 0:
                             continue
                         if p not in side:
                             # first time we see p; add it to visit
                             side[p] = side[r]
                             if side[p]:
                                 interesting += 1
                             heapq.heappush(visit, -p)
                         elif side[p] and side[p] != side[r]:
                             # p was interesting but now we know better
                             side[p] = 0
                             interesting -= 1
                             hascommonancestor = True
                     if side[r]:
                         limit = r # lowest rev visited
                         interesting -= 1
                 if not hascommonancestor:
                     return None
                 return limit
             def _chain(src, dst, a, b):
                 '''chain two sets of copies a->b'''
                 t = a.copy()
                 for k, v in b.iteritems():
                     if v in t:
                         # found a chain
                         if t[v] != k:
                             # file wasn't renamed back to itself
                             t[k] = t[v]
                         if v not in dst:
                             # chain was a rename, not a copy
                             del t[v]
                     if v in src:
                         # file is a copy of an existing file
                         t[k] = v
                 # remove criss-crossed copies
                 for k, v in t.items():
                     if k in src and v in dst:
                         del t[k]
                 return t
             def _tracefile(fctx, am, limit=-1):
                 '''return file context that is the ancestor of fctx present in ancestor
                 manifest am, stopping after the first ancestor lower than limit'''
                 for f in fctx.ancestors():
                     if am.get(f.path(), None) == f.filenode():
                         return f
                     if f.rev() < limit:
                         return None
             def _dirstatecopies(d):
                 ds = d._repo.dirstate
                 c = ds.copies().copy()
                 for k in c.keys():
                     if ds[k] not in 'anm':
                         del c[k]
                 return c
             def _forwardcopies(a, b):
                 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
                 # check for working copy
                 w = None
                 if b.rev() is None:
                     w = b
                     b = w.p1()
                     if a == b:
                         # short-circuit to avoid issues with merge states
                         return _dirstatecopies(w)
                 # files might have to be traced back to the fctx parent of the last
                 # one-side-only changeset, but not further back than that
                 limit = _findlimit(a._repo, a.rev(), b.rev())
                 if limit is None:
                     limit = -1
                 am = a.manifest()
                 # find where new files came from
                 # we currently don't try to find where old files went, too expensive
                 # this means we can miss a case like 'hg rm b; hg cp a b'
                 cm = {}
                 missing = set(b.manifest().iterkeys())
                 missing.difference_update(a.manifest().iterkeys())
                 for f in missing:
                     ofctx = _tracefile(b[f], am, limit)
                     if ofctx:
                         cm[f] = ofctx.path()
                 # combine copies from dirstate if necessary
                 if w is not None:
                     cm = _chain(a, w, cm, _dirstatecopies(w))
                 return cm
             def _backwardrenames(a, b):
                 # Even though we're not taking copies into account, 1:n rename situations
                 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
                 # arbitrarily pick one of the renames.
                 f = _forwardcopies(b, a)
                 r = {}
                 for k, v in sorted(f.iteritems()):
                     # remove copies
                     if v in a:
                         continue
                     r[v] = k
                 return r
             def pathcopies(x, y):
                 '''find {dst@y: src@x} copy mapping for directed compare'''
                 if x == y or not x or not y:
                     return {}
                 a = y.ancestor(x)
                 if a == x:
                     return _forwardcopies(x, y)
                 if a == y:
                     return _backwardrenames(x, y)
                 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
             def mergecopies(repo, c1, c2, ca):
                 """
                 Find moves and copies between context c1 and c2 that are relevant
                 for merging.
                 Returns four dicts: "copy", "movewithdir", "diverge", and
                 "renamedelete".
                 "copy" is a mapping from destination name -> source name,
                 where source is in c1 and destination is in c2 or vice-versa.
                 "movewithdir" is a mapping from source name -> destination name,
                 where the file at source present in one context but not the other
                 needs to be moved to destination by the merge process, because the
                 other context moved the directory it is in.
                 "diverge" is a mapping of source name -> list of destination names
                 for divergent renames.
                 "renamedelete" is a mapping of source name -> list of destination
                 names for files deleted in c1 that were renamed in c2 or vice-versa.
                 """
                 # avoid silly behavior for update from empty dir
                 if not c1 or not c2 or c1 == c2:
                     return {}, {}, {}, {}
                 # avoid silly behavior for parent -> working dir
                 if c2.node() is None and c1.node() == repo.dirstate.p1():
                     return repo.dirstate.copies(), {}, {}, {}
                 limit = _findlimit(repo, c1.rev(), c2.rev())
                 if limit is None:
                     # no common ancestor, no copies
                     return {}, {}, {}, {}
                 m1 = c1.manifest()
                 m2 = c2.manifest()
                 ma = ca.manifest()
                 def makectx(f, n):
                     if len(n) != 20: # in a working context?
                         if c1.rev() is None:
                             return c1.filectx(f)
                         return c2.filectx(f)
                     return repo.filectx(f, fileid=n)
                 ctx = util.lrucachefunc(makectx)
                 copy = {}
                 movewithdir = {}
                 fullcopy = {}
                 diverge = {}
                 repo.ui.debug("  searching for copies back to rev %d\n" % limit)
                 u1 = _nonoverlap(m1, m2, ma)
                 u2 = _nonoverlap(m2, m1, ma)
                 if u1:
                     repo.ui.debug("  unmatched files in local:\n   %s\n"
                                   % "\n   ".join(u1))
                 if u2:
                     repo.ui.debug("  unmatched files in other:\n   %s\n"
                                   % "\n   ".join(u2))
                 for f in u1:
                     checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
                 for f in u2:
                     checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
                 renamedelete = {}
                 renamedelete2 = set()
                 diverge2 = set()
                 for of, fl in diverge.items():
                     if len(fl) == 1 or of in c1 or of in c2:
                         del diverge[of] # not actually divergent, or not a rename
                         if of not in c1 and of not in c2:
                             # renamed on one side, deleted on the other side, but filter
                             # out files that have been renamed and then deleted
                             renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                             renamedelete2.update(fl) # reverse map for below
                     else:
                         diverge2.update(fl) # reverse map for below
                 bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
                 if bothnew:
                     repo.ui.debug("  unmatched files new in both:\n   %s\n"
                                   % "\n   ".join(bothnew))
                 bothdiverge, _copy, _fullcopy = {}, {}, {}
                 for f in bothnew:
                     checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
                     checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
                 for of, fl in bothdiverge.items():
                     if len(fl) == 2 and fl[0] == fl[1]:
                         copy[fl[0]] = of # not actually divergent, just matching renames
-                if fullcopy:
+                if fullcopy and repo.ui.debugflag:
                     repo.ui.debug("  all copies found (* = to merge, ! = divergent, "
                                   "% = renamed and deleted):\n")
                     for f in sorted(fullcopy):
                         note = ""
                         if f in copy:
                             note += "*"
                         if f in diverge2:
                             note += "!"
                         if f in renamedelete2:
                             note += "%"
                         repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
                                                                           note))
                 del diverge2
                 if not fullcopy:
                     return copy, movewithdir, diverge, renamedelete
                 repo.ui.debug("  checking for directory renames\n")
                 # generate a directory move map
                 d1, d2 = c1.dirs(), c2.dirs()
                 d1.addpath('/')
                 d2.addpath('/')
                 invalid = set()
                 dirmove = {}
                 # examine each file copy for a potential directory move, which is
                 # when all the files in a directory are moved to a new directory
                 for dst, src in fullcopy.iteritems():
                     dsrc, ddst = _dirname(src), _dirname(dst)
                     if dsrc in invalid:
                         # already seen to be uninteresting
                         continue
                     elif dsrc in d1 and ddst in d1:
                         # directory wasn't entirely moved locally
                         invalid.add(dsrc)
                     elif dsrc in d2 and ddst in d2:
                         # directory wasn't entirely moved remotely
                         invalid.add(dsrc)
                     elif dsrc in dirmove and dirmove[dsrc] != ddst:
                         # files from the same directory moved to two different places
                         invalid.add(dsrc)
                     else:
                         # looks good so far
                         dirmove[dsrc + "/"] = ddst + "/"
                 for i in invalid:
                     if i in dirmove:
                         del dirmove[i]
                 del d1, d2, invalid
                 if not dirmove:
                     return copy, movewithdir, diverge, renamedelete
                 for d in dirmove:
                     repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" %
                                   (d, dirmove[d]))
                 # check unaccounted nonoverlapping files against directory moves
                 for f in u1 + u2:
                     if f not in fullcopy:
                         for d in dirmove:
                             if f.startswith(d):
                                 # new file added in a directory that was moved, move it
                                 df = dirmove[d] + f[len(d):]
                                 if df not in copy:
                                     movewithdir[f] = df
                                     repo.ui.debug(("   pending file src: '%s' -> "
                                                    "dst: '%s'\n") % (f, df))
                                 break
                 return copy, movewithdir, diverge, renamedelete
             def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
                 """
                 check possible copies of f from m1 to m2
                 ctx = function accepting (filename, node) that returns a filectx.
                 f = the filename to check
                 m1 = the source manifest
                 m2 = the destination manifest
                 ca = the changectx of the common ancestor
                 limit = the rev number to not search beyond
                 diverge = record all diverges in this dict
                 copy = record all non-divergent copies in this dict
                 fullcopy = record all copies in this dict
                 """
                 ma = ca.manifest()
                 def _related(f1, f2, limit):
                     # Walk back to common ancestor to see if the two files originate
                     # from the same file. Since workingfilectx's rev() is None it messes
                     # up the integer comparison logic, hence the pre-step check for
                     # None (f1 and f2 can only be workingfilectx's initially).
                     if f1 == f2:
                         return f1 # a match
                     g1, g2 = f1.ancestors(), f2.ancestors()
                     try:
                         f1r, f2r = f1.rev(), f2.rev()
                         if f1r is None:
                             f1 = g1.next()
                         if f2r is None:
                             f2 = g2.next()
                         while True:
                             f1r, f2r = f1.rev(), f2.rev()
                             if f1r > f2r:
                                 f1 = g1.next()
                             elif f2r > f1r:
                                 f2 = g2.next()
                             elif f1 == f2:
                                 return f1 # a match
                             elif f1r == f2r or f1r < limit or f2r < limit:
                                 return False # copy no longer relevant
                     except StopIteration:
                         return False
                 of = None
                 seen = set([f])
                 for oc in ctx(f, m1[f]).ancestors():
                     ocr = oc.rev()
                     of = oc.path()
                     if of in seen:
                         # check limit late - grab last rename before
                         if ocr < limit:
                             break
                         continue
                     seen.add(of)
                     fullcopy[f] = of # remember for dir rename detection
                     if of not in m2:
                         continue # no match, keep looking
                     if m2[of] == ma.get(of):
                         break # no merge needed, quit early
                     c2 = ctx(of, m2[of])
                     cr = _related(oc, c2, ca.rev())
                     if cr and (of == f or of == c2.path()): # non-divergent
                         copy[f] = of
                         of = None
                         break
                 if of in ma:
                     diverge.setdefault(of, []).append(f)