upstream/mercurial-mirror Commit - r43773:acc4047c

1

# linelog - efficient cache for annotate data

1

# linelog - efficient cache for annotate data

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

"""linelog is an efficient cache for annotate data inspired by SCCS Weaves.

7

"""linelog is an efficient cache for annotate data inspired by SCCS Weaves.

8

9

SCCS Weaves are an implementation of

9

SCCS Weaves are an implementation of

10

https://en.wikipedia.org/wiki/Interleaved_deltas. See

10

https://en.wikipedia.org/wiki/Interleaved_deltas. See

11

mercurial/help/internals/linelog.txt for an exploration of SCCS weaves

11

mercurial/help/internals/linelog.txt for an exploration of SCCS weaves

12

and how linelog works in detail.

12

and how linelog works in detail.

13

14

Here's a hacker's summary: a linelog is a program which is executed in

14

Here's a hacker's summary: a linelog is a program which is executed in

15

the context of a revision. Executing the program emits information

15

the context of a revision. Executing the program emits information

16

about lines, including the revision that introduced them and the line

16

about lines, including the revision that introduced them and the line

17

number in the file at the introducing revision. When an insertion or

17

number in the file at the introducing revision. When an insertion or

18

deletion is performed on the file, a jump instruction is used to patch

18

deletion is performed on the file, a jump instruction is used to patch

19

in a new body of annotate information.

19

in a new body of annotate information.

20

"""

20

"""

21

from __future__ import absolute_import, print_function

21

from __future__ import absolute_import, print_function

22

23

import abc

23

import abc

24

import struct

24

import struct

25

26

from .thirdparty import attr

26

from .thirdparty import attr

27

from . import pycompat

27

from . import pycompat

28

29

_llentry = struct.Struct(b'>II')

29

_llentry = struct.Struct(b'>II')

30

31

32

class LineLogError(Exception):

32

class LineLogError(Exception):

33

"""Error raised when something bad happens internally in linelog."""

33

"""Error raised when something bad happens internally in linelog."""

34

35

36

@attr.s

36

@attr.s

37

class lineinfo(object):

37

class lineinfo(object):

38

# Introducing revision of this line.

38

# Introducing revision of this line.

39

rev = attr.ib()

39

rev = attr.ib()

40

# Line number for this line in its introducing revision.

40

# Line number for this line in its introducing revision.

41

linenum = attr.ib()

41

linenum = attr.ib()

42

# Private. Offset in the linelog program of this line. Used internally.

42

# Private. Offset in the linelog program of this line. Used internally.

43

_offset = attr.ib()

43

_offset = attr.ib()

44

45

46

@attr.s

46

@attr.s

47

class annotateresult(object):

47

class annotateresult(object):

48

rev = attr.ib()

48

rev = attr.ib()

49

lines = attr.ib()

49

lines = attr.ib()

50

_eof = attr.ib()

50

_eof = attr.ib()

51

52

def __iter__(self):

52

def __iter__(self):

53

return iter(self.lines)

53

return iter(self.lines)

54

55

56

class _llinstruction(object):

56

class _llinstruction(object): # pytype: disable=ignored-metaclass

57

58

__metaclass__ = abc.ABCMeta

58

__metaclass__ = abc.ABCMeta

59

60

@abc.abstractmethod

60

@abc.abstractmethod

61

def __init__(self, op1, op2):

61

def __init__(self, op1, op2):

62

pass

62

pass

63

64

@abc.abstractmethod

64

@abc.abstractmethod

65

def __str__(self):

65

def __str__(self):

66

pass

66

pass

67

68

def __repr__(self):

68

def __repr__(self):

69

return str(self)

69

return str(self)

70

71

@abc.abstractmethod

71

@abc.abstractmethod

72

def __eq__(self, other):

72

def __eq__(self, other):

73

pass

73

pass

74

75

@abc.abstractmethod

75

@abc.abstractmethod

76

def encode(self):

76

def encode(self):

77

"""Encode this instruction to the binary linelog format."""

77

"""Encode this instruction to the binary linelog format."""

78

79

@abc.abstractmethod

79

@abc.abstractmethod

80

def execute(self, rev, pc, emit):

80

def execute(self, rev, pc, emit):

81

"""Execute this instruction.

81

"""Execute this instruction.

82

83

Args:

83

Args:

84

rev: The revision we're annotating.

84

rev: The revision we're annotating.

85

pc: The current offset in the linelog program.

85

pc: The current offset in the linelog program.

86

emit: A function that accepts a single lineinfo object.

86

emit: A function that accepts a single lineinfo object.

87

88

Returns:

88

Returns:

89

The new value of pc. Returns None if exeuction should stop

89

The new value of pc. Returns None if exeuction should stop

90

(that is, we've found the end of the file.)

90

(that is, we've found the end of the file.)

91

"""

91

"""

92

93

94

class _jge(_llinstruction):

94

class _jge(_llinstruction):

95

"""If the current rev is greater than or equal to op1, jump to op2."""

95

"""If the current rev is greater than or equal to op1, jump to op2."""

96

97

def __init__(self, op1, op2):

97

def __init__(self, op1, op2):

98

self._cmprev = op1

98

self._cmprev = op1

99

self._target = op2

99

self._target = op2

100

101

def __str__(self):

101

def __str__(self):

102

return r'JGE %d %d' % (self._cmprev, self._target)

102

return r'JGE %d %d' % (self._cmprev, self._target)

103

104

def __eq__(self, other):

104

def __eq__(self, other):

105

return (

105

return (

106

type(self) == type(other)

106

type(self) == type(other)

107

and self._cmprev == other._cmprev

107

and self._cmprev == other._cmprev

108

and self._target == other._target

108

and self._target == other._target

109

)

109

)

110

111

def encode(self):

111

def encode(self):

112

return _llentry.pack(self._cmprev << 2, self._target)

112

return _llentry.pack(self._cmprev << 2, self._target)

113

114

def execute(self, rev, pc, emit):

114

def execute(self, rev, pc, emit):

115

if rev >= self._cmprev:

115

if rev >= self._cmprev:

116

return self._target

116

return self._target

117

return pc + 1

117

return pc + 1

118

119

120

class _jump(_llinstruction):

120

class _jump(_llinstruction):

121

"""Unconditional jumps are expressed as a JGE with op1 set to 0."""

121

"""Unconditional jumps are expressed as a JGE with op1 set to 0."""

122

123

def __init__(self, op1, op2):

123

def __init__(self, op1, op2):

124

if op1 != 0:

124

if op1 != 0:

125

raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)

125

raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)

126

self._target = op2

126

self._target = op2

127

128

def __str__(self):

128

def __str__(self):

129

return r'JUMP %d' % (self._target)

129

return r'JUMP %d' % (self._target)

130

131

def __eq__(self, other):

131

def __eq__(self, other):

132

return type(self) == type(other) and self._target == other._target

132

return type(self) == type(other) and self._target == other._target

133

134

def encode(self):

134

def encode(self):

135

return _llentry.pack(0, self._target)

135

return _llentry.pack(0, self._target)

136

137

def execute(self, rev, pc, emit):

137

def execute(self, rev, pc, emit):

138

return self._target

138

return self._target

139

140

141

class _eof(_llinstruction):

141

class _eof(_llinstruction):

142

"""EOF is expressed as a JGE that always jumps to 0."""

142

"""EOF is expressed as a JGE that always jumps to 0."""

143

144

def __init__(self, op1, op2):

144

def __init__(self, op1, op2):

145

if op1 != 0:

145

if op1 != 0:

146

raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)

146

raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)

147

if op2 != 0:

147

if op2 != 0:

148

raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)

148

raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)

149

150

def __str__(self):

150

def __str__(self):

151

return r'EOF'

151

return r'EOF'

152

153

def __eq__(self, other):

153

def __eq__(self, other):

154

return type(self) == type(other)

154

return type(self) == type(other)

155

156

def encode(self):

156

def encode(self):

157

return _llentry.pack(0, 0)

157

return _llentry.pack(0, 0)

158

159

def execute(self, rev, pc, emit):

159

def execute(self, rev, pc, emit):

160

return None

160

return None

161

162

163

class _jl(_llinstruction):

163

class _jl(_llinstruction):

164

"""If the current rev is less than op1, jump to op2."""

164

"""If the current rev is less than op1, jump to op2."""

165

166

def __init__(self, op1, op2):

166

def __init__(self, op1, op2):

167

self._cmprev = op1

167

self._cmprev = op1

168

self._target = op2

168

self._target = op2

169

170

def __str__(self):

170

def __str__(self):

171

return r'JL %d %d' % (self._cmprev, self._target)

171

return r'JL %d %d' % (self._cmprev, self._target)

172

173

def __eq__(self, other):

173

def __eq__(self, other):

174

return (

174

return (

175

type(self) == type(other)

175

type(self) == type(other)

176

and self._cmprev == other._cmprev

176

and self._cmprev == other._cmprev

177

and self._target == other._target

177

and self._target == other._target

178

)

178

)

179

180

def encode(self):

180

def encode(self):

181

return _llentry.pack(1 | (self._cmprev << 2), self._target)

181

return _llentry.pack(1 | (self._cmprev << 2), self._target)

182

183

def execute(self, rev, pc, emit):

183

def execute(self, rev, pc, emit):

184

if rev < self._cmprev:

184

if rev < self._cmprev:

185

return self._target

185

return self._target

186

return pc + 1

186

return pc + 1

187

188

189

class _line(_llinstruction):

189

class _line(_llinstruction):

190

"""Emit a line."""

190

"""Emit a line."""

191

192

def __init__(self, op1, op2):

192

def __init__(self, op1, op2):

193

# This line was introduced by this revision number.

193

# This line was introduced by this revision number.

194

self._rev = op1

194

self._rev = op1

195

# This line had the specified line number in the introducing revision.

195

# This line had the specified line number in the introducing revision.

196

self._origlineno = op2

196

self._origlineno = op2

197

198

def __str__(self):

198

def __str__(self):

199

return r'LINE %d %d' % (self._rev, self._origlineno)

199

return r'LINE %d %d' % (self._rev, self._origlineno)

200

201

def __eq__(self, other):

201

def __eq__(self, other):

202

return (

202

return (

203

type(self) == type(other)

203

type(self) == type(other)

204

and self._rev == other._rev

204

and self._rev == other._rev

205

and self._origlineno == other._origlineno

205

and self._origlineno == other._origlineno

206

)

206

)

207

208

def encode(self):

208

def encode(self):

209

return _llentry.pack(2 | (self._rev << 2), self._origlineno)

209

return _llentry.pack(2 | (self._rev << 2), self._origlineno)

210

211

def execute(self, rev, pc, emit):

211

def execute(self, rev, pc, emit):

212

emit(lineinfo(self._rev, self._origlineno, pc))

212

emit(lineinfo(self._rev, self._origlineno, pc))

213

return pc + 1

213

return pc + 1

214

215

216

def _decodeone(data, offset):

216

def _decodeone(data, offset):

217

"""Decode a single linelog instruction from an offset in a buffer."""

217

"""Decode a single linelog instruction from an offset in a buffer."""

218

try:

218

try:

219

op1, op2 = _llentry.unpack_from(data, offset)

219

op1, op2 = _llentry.unpack_from(data, offset)

220

except struct.error as e:

220

except struct.error as e:

221

raise LineLogError(b'reading an instruction failed: %r' % e)

221

raise LineLogError(b'reading an instruction failed: %r' % e)

222

opcode = op1 & 0b11

222

opcode = op1 & 0b11

223

op1 = op1 >> 2

223

op1 = op1 >> 2

224

if opcode == 0:

224

if opcode == 0:

225

if op1 == 0:

225

if op1 == 0:

226

if op2 == 0:

226

if op2 == 0:

227

return _eof(op1, op2)

227

return _eof(op1, op2)

228

return _jump(op1, op2)

228

return _jump(op1, op2)

229

return _jge(op1, op2)

229

return _jge(op1, op2)

230

elif opcode == 1:

230

elif opcode == 1:

231

return _jl(op1, op2)

231

return _jl(op1, op2)

232

elif opcode == 2:

232

elif opcode == 2:

233

return _line(op1, op2)

233

return _line(op1, op2)

234

raise NotImplementedError(b'Unimplemented opcode %r' % opcode)

234

raise NotImplementedError(b'Unimplemented opcode %r' % opcode)

235

236

237

class linelog(object):

237

class linelog(object):

238

"""Efficient cache for per-line history information."""

238

"""Efficient cache for per-line history information."""

239

240

def __init__(self, program=None, maxrev=0):

240

def __init__(self, program=None, maxrev=0):

241

if program is None:

241

if program is None:

242

# We pad the program with an extra leading EOF so that our

242

# We pad the program with an extra leading EOF so that our

243

# offsets will match the C code exactly. This means we can

243

# offsets will match the C code exactly. This means we can

244

# interoperate with the C code.

244

# interoperate with the C code.

245

program = [_eof(0, 0), _eof(0, 0)]

245

program = [_eof(0, 0), _eof(0, 0)]

246

self._program = program

246

self._program = program

247

self._lastannotate = None

247

self._lastannotate = None

248

self._maxrev = maxrev

248

self._maxrev = maxrev

249

250

def __eq__(self, other):

250

def __eq__(self, other):

251

return (

251

return (

252

type(self) == type(other)

252

type(self) == type(other)

253

and self._program == other._program

253

and self._program == other._program

254

and self._maxrev == other._maxrev

254

and self._maxrev == other._maxrev

255

)

255

)

256

257

def __repr__(self):

257

def __repr__(self):

258

return b'<linelog at %s: maxrev=%d size=%d>' % (

258

return b'<linelog at %s: maxrev=%d size=%d>' % (

259

hex(id(self)),

259

hex(id(self)),

260

self._maxrev,

260

self._maxrev,

261

len(self._program),

261

len(self._program),

262

)

262

)

263

264

def debugstr(self):

264

def debugstr(self):

265

fmt = r'%%%dd %%s' % len(str(len(self._program)))

265

fmt = r'%%%dd %%s' % len(str(len(self._program)))

266

return pycompat.sysstr(b'\n').join(

266

return pycompat.sysstr(b'\n').join(

267

fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)

267

fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)

268

)

268

)

269

270

@classmethod

270

@classmethod

271

def fromdata(cls, buf):

271

def fromdata(cls, buf):

272

if len(buf) % _llentry.size != 0:

272

if len(buf) % _llentry.size != 0:

273

raise LineLogError(

273

raise LineLogError(

274

b"invalid linelog buffer size %d (must be a multiple of %d)"

274

b"invalid linelog buffer size %d (must be a multiple of %d)"

275

% (len(buf), _llentry.size)

275

% (len(buf), _llentry.size)

276

)

276

)

277

expected = len(buf) / _llentry.size

277

expected = len(buf) / _llentry.size

278

fakejge = _decodeone(buf, 0)

278

fakejge = _decodeone(buf, 0)

279

if isinstance(fakejge, _jump):

279

if isinstance(fakejge, _jump):

280

maxrev = 0

280

maxrev = 0

281

else:

281

else:

282

maxrev = fakejge._cmprev

282

maxrev = fakejge._cmprev

283

numentries = fakejge._target

283

numentries = fakejge._target

284

if expected != numentries:

284

if expected != numentries:

285

raise LineLogError(

285

raise LineLogError(

286

b"corrupt linelog data: claimed"

286

b"corrupt linelog data: claimed"

287

b" %d entries but given data for %d entries"

287

b" %d entries but given data for %d entries"

288

% (expected, numentries)

288

% (expected, numentries)

289

)

289

)

290

instructions = [_eof(0, 0)]

290

instructions = [_eof(0, 0)]

291

for offset in pycompat.xrange(1, numentries):

291

for offset in pycompat.xrange(1, numentries):

292

instructions.append(_decodeone(buf, offset * _llentry.size))

292

instructions.append(_decodeone(buf, offset * _llentry.size))

293

return cls(instructions, maxrev=maxrev)

293

return cls(instructions, maxrev=maxrev)

294

295

def encode(self):

295

def encode(self):

296

hdr = _jge(self._maxrev, len(self._program)).encode()

296

hdr = _jge(self._maxrev, len(self._program)).encode()

297

return hdr + b''.join(i.encode() for i in self._program[1:])

297

return hdr + b''.join(i.encode() for i in self._program[1:])

298

299

def clear(self):

299

def clear(self):

300

self._program = []

300

self._program = []

301

self._maxrev = 0

301

self._maxrev = 0

302

self._lastannotate = None

302

self._lastannotate = None

303

304

def replacelines_vec(self, rev, a1, a2, blines):

304

def replacelines_vec(self, rev, a1, a2, blines):

305

return self.replacelines(

305

return self.replacelines(

306

rev, a1, a2, 0, len(blines), _internal_blines=blines

306

rev, a1, a2, 0, len(blines), _internal_blines=blines

307

)

307

)

308

309

def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):

309

def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):

310

"""Replace lines [a1, a2) with lines [b1, b2)."""

310

"""Replace lines [a1, a2) with lines [b1, b2)."""

311

if self._lastannotate:

311

if self._lastannotate:

312

# TODO(augie): make replacelines() accept a revision at

312

# TODO(augie): make replacelines() accept a revision at

313

# which we're editing as well as a revision to mark

313

# which we're editing as well as a revision to mark

314

# responsible for the edits. In hg-experimental it's

314

# responsible for the edits. In hg-experimental it's

315

# stateful like this, so we're doing the same thing to

315

# stateful like this, so we're doing the same thing to

316

# retain compatibility with absorb until that's imported.

316

# retain compatibility with absorb until that's imported.

317

ar = self._lastannotate

317

ar = self._lastannotate

318

else:

318

else:

319

ar = self.annotate(rev)

319

ar = self.annotate(rev)

320

# ar = self.annotate(self._maxrev)

320

# ar = self.annotate(self._maxrev)

321

if a1 > len(ar.lines):

321

if a1 > len(ar.lines):

322

raise LineLogError(

322

raise LineLogError(

323

b'%d contains %d lines, tried to access line %d'

323

b'%d contains %d lines, tried to access line %d'

324

% (rev, len(ar.lines), a1)

324

% (rev, len(ar.lines), a1)

325

)

325

)

326

elif a1 == len(ar.lines):

326

elif a1 == len(ar.lines):

327

# Simulated EOF instruction since we're at EOF, which

327

# Simulated EOF instruction since we're at EOF, which

328

# doesn't have a "real" line.

328

# doesn't have a "real" line.

329

a1inst = _eof(0, 0)

329

a1inst = _eof(0, 0)

330

a1info = lineinfo(0, 0, ar._eof)

330

a1info = lineinfo(0, 0, ar._eof)

331

else:

331

else:

332

a1info = ar.lines[a1]

332

a1info = ar.lines[a1]

333

a1inst = self._program[a1info._offset]

333

a1inst = self._program[a1info._offset]

334

programlen = self._program.__len__

334

programlen = self._program.__len__

335

oldproglen = programlen()

335

oldproglen = programlen()

336

appendinst = self._program.append

336

appendinst = self._program.append

337

338

# insert

338

# insert

339

blineinfos = []

339

blineinfos = []

340

bappend = blineinfos.append

340

bappend = blineinfos.append

341

if b1 < b2:

341

if b1 < b2:

342

# Determine the jump target for the JGE at the start of

342

# Determine the jump target for the JGE at the start of

343

# the new block.

343

# the new block.

344

tgt = oldproglen + (b2 - b1 + 1)

344

tgt = oldproglen + (b2 - b1 + 1)

345

# Jump to skip the insert if we're at an older revision.

345

# Jump to skip the insert if we're at an older revision.

346

appendinst(_jl(rev, tgt))

346

appendinst(_jl(rev, tgt))

347

for linenum in pycompat.xrange(b1, b2):

347

for linenum in pycompat.xrange(b1, b2):

348

if _internal_blines is None:

348

if _internal_blines is None:

349

bappend(lineinfo(rev, linenum, programlen()))

349

bappend(lineinfo(rev, linenum, programlen()))

350

appendinst(_line(rev, linenum))

350

appendinst(_line(rev, linenum))

351

else:

351

else:

352

newrev, newlinenum = _internal_blines[linenum]

352

newrev, newlinenum = _internal_blines[linenum]

353

bappend(lineinfo(newrev, newlinenum, programlen()))

353

bappend(lineinfo(newrev, newlinenum, programlen()))

354

appendinst(_line(newrev, newlinenum))

354

appendinst(_line(newrev, newlinenum))

355

# delete

355

# delete

356

if a1 < a2:

356

if a1 < a2:

357

if a2 > len(ar.lines):

357

if a2 > len(ar.lines):

358

raise LineLogError(

358

raise LineLogError(

359

b'%d contains %d lines, tried to access line %d'

359

b'%d contains %d lines, tried to access line %d'

360

% (rev, len(ar.lines), a2)

360

% (rev, len(ar.lines), a2)

361

)

361

)

362

elif a2 == len(ar.lines):

362

elif a2 == len(ar.lines):

363

endaddr = ar._eof

363

endaddr = ar._eof

364

else:

364

else:

365

endaddr = ar.lines[a2]._offset

365

endaddr = ar.lines[a2]._offset

366

if a2 > 0 and rev < self._maxrev:

366

if a2 > 0 and rev < self._maxrev:

367

# If we're here, we're deleting a chunk of an old

367

# If we're here, we're deleting a chunk of an old

368

# commit, so we need to be careful and not touch

368

# commit, so we need to be careful and not touch

369

# invisible lines between a2-1 and a2 (IOW, lines that

369

# invisible lines between a2-1 and a2 (IOW, lines that

370

# are added later).

370

# are added later).

371

endaddr = ar.lines[a2 - 1]._offset + 1

371

endaddr = ar.lines[a2 - 1]._offset + 1

372

appendinst(_jge(rev, endaddr))

372

appendinst(_jge(rev, endaddr))

373

# copy instruction from a1

373

# copy instruction from a1

374

a1instpc = programlen()

374

a1instpc = programlen()

375

appendinst(a1inst)

375

appendinst(a1inst)

376

# if a1inst isn't a jump or EOF, then we need to add an unconditional

376

# if a1inst isn't a jump or EOF, then we need to add an unconditional

377

# jump back into the program here.

377

# jump back into the program here.

378

if not isinstance(a1inst, (_jump, _eof)):

378

if not isinstance(a1inst, (_jump, _eof)):

379

appendinst(_jump(0, a1info._offset + 1))

379

appendinst(_jump(0, a1info._offset + 1))

380

# Patch instruction at a1, which makes our patch live.

380

# Patch instruction at a1, which makes our patch live.

381

self._program[a1info._offset] = _jump(0, oldproglen)

381

self._program[a1info._offset] = _jump(0, oldproglen)

382

383

# Update self._lastannotate in place. This serves as a cache to avoid

383

# Update self._lastannotate in place. This serves as a cache to avoid

384

# expensive "self.annotate" in this function, when "replacelines" is

384

# expensive "self.annotate" in this function, when "replacelines" is

385

# used continuously.

385

# used continuously.

386

if len(self._lastannotate.lines) > a1:

386

if len(self._lastannotate.lines) > a1:

387

self._lastannotate.lines[a1]._offset = a1instpc

387

self._lastannotate.lines[a1]._offset = a1instpc

388

else:

388

else:

389

assert isinstance(a1inst, _eof)

389

assert isinstance(a1inst, _eof)

390

self._lastannotate._eof = a1instpc

390

self._lastannotate._eof = a1instpc

391

self._lastannotate.lines[a1:a2] = blineinfos

391

self._lastannotate.lines[a1:a2] = blineinfos

392

self._lastannotate.rev = max(self._lastannotate.rev, rev)

392

self._lastannotate.rev = max(self._lastannotate.rev, rev)

393

394

if rev > self._maxrev:

394

if rev > self._maxrev:

395

self._maxrev = rev

395

self._maxrev = rev

396

397

def annotate(self, rev):

397

def annotate(self, rev):

398

pc = 1

398

pc = 1

399

lines = []

399

lines = []

400

executed = 0

400

executed = 0

401

# Sanity check: if instructions executed exceeds len(program), we

401

# Sanity check: if instructions executed exceeds len(program), we

402

# hit an infinite loop in the linelog program somehow and we

402

# hit an infinite loop in the linelog program somehow and we

403

# should stop.

403

# should stop.

404

while pc is not None and executed < len(self._program):

404

while pc is not None and executed < len(self._program):

405

inst = self._program[pc]

405

inst = self._program[pc]

406

lastpc = pc

406

lastpc = pc

407

pc = inst.execute(rev, pc, lines.append)

407

pc = inst.execute(rev, pc, lines.append)

408

executed += 1

408

executed += 1

409

if pc is not None:

409

if pc is not None:

410

raise LineLogError(

410

raise LineLogError(

411

r'Probably hit an infinite loop in linelog. Program:\n'

411

r'Probably hit an infinite loop in linelog. Program:\n'

412

+ self.debugstr()

412

+ self.debugstr()

413

)

413

)

414

ar = annotateresult(rev, lines, lastpc)

414

ar = annotateresult(rev, lines, lastpc)

415

self._lastannotate = ar

415

self._lastannotate = ar

416

return ar

416

return ar

417

418

@property

418

@property

419

def maxrev(self):

419

def maxrev(self):

420

return self._maxrev

420

return self._maxrev

421

422

# Stateful methods which depend on the value of the last

422

# Stateful methods which depend on the value of the last

423

# annotation run. This API is for compatiblity with the original

423

# annotation run. This API is for compatiblity with the original

424

# linelog, and we should probably consider refactoring it.

424

# linelog, and we should probably consider refactoring it.

425

@property

425

@property

426

def annotateresult(self):

426

def annotateresult(self):

427

"""Return the last annotation result. C linelog code exposed this."""

427

"""Return the last annotation result. C linelog code exposed this."""

428

return [(l.rev, l.linenum) for l in self._lastannotate.lines]

428

return [(l.rev, l.linenum) for l in self._lastannotate.lines]

429

430

def getoffset(self, line):

430

def getoffset(self, line):

431

return self._lastannotate.lines[line]._offset

431

return self._lastannotate.lines[line]._offset

432

433

def getalllines(self, start=0, end=0):

433

def getalllines(self, start=0, end=0):

434

"""Get all lines that ever occurred in [start, end).

434

"""Get all lines that ever occurred in [start, end).

435

436

Passing start == end == 0 means "all lines ever".

436

Passing start == end == 0 means "all lines ever".

437

438

This works in terms of *internal* program offsets, not line numbers.

438

This works in terms of *internal* program offsets, not line numbers.

439

"""

439

"""

440

pc = start or 1

440

pc = start or 1

441

lines = []

441

lines = []

442

# only take as many steps as there are instructions in the

442

# only take as many steps as there are instructions in the

443

# program - if we don't find an EOF or our stop-line before

443

# program - if we don't find an EOF or our stop-line before

444

# then, something is badly broken.

444

# then, something is badly broken.

445

for step in pycompat.xrange(len(self._program)):

445

for step in pycompat.xrange(len(self._program)):

446

inst = self._program[pc]

446

inst = self._program[pc]

447

nextpc = pc + 1

447

nextpc = pc + 1

448

if isinstance(inst, _jump):

448

if isinstance(inst, _jump):

449

nextpc = inst._target

449

nextpc = inst._target

450

elif isinstance(inst, _eof):

450

elif isinstance(inst, _eof):

451

return lines

451

return lines

452

elif isinstance(inst, (_jl, _jge)):

452

elif isinstance(inst, (_jl, _jge)):

453

pass

453

pass

454

elif isinstance(inst, _line):

454

elif isinstance(inst, _line):

455

lines.append((inst._rev, inst._origlineno))

455

lines.append((inst._rev, inst._origlineno))

456

else:

456

else:

457

raise LineLogError(b"Illegal instruction %r" % inst)

457

raise LineLogError(b"Illegal instruction %r" % inst)

458

if nextpc == end:

458

if nextpc == end:

459

return lines

459

return lines

460

pc = nextpc

460

pc = nextpc

461

raise LineLogError(b"Failed to perform getalllines")

461

raise LineLogError(b"Failed to perform getalllines")

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # linelog - efficient cache for annotate data
             #
             # Copyright 2018 Google LLC.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """linelog is an efficient cache for annotate data inspired by SCCS Weaves.
             SCCS Weaves are an implementation of
             https://en.wikipedia.org/wiki/Interleaved_deltas. See
             mercurial/help/internals/linelog.txt for an exploration of SCCS weaves
             and how linelog works in detail.
             Here's a hacker's summary: a linelog is a program which is executed in
             the context of a revision. Executing the program emits information
             about lines, including the revision that introduced them and the line
             number in the file at the introducing revision. When an insertion or
             deletion is performed on the file, a jump instruction is used to patch
             in a new body of annotate information.
             """
             from __future__ import absolute_import, print_function
             import abc
             import struct
             from .thirdparty import attr
             from . import pycompat
             _llentry = struct.Struct(b'>II')
             class LineLogError(Exception):
                 """Error raised when something bad happens internally in linelog."""
             @attr.s
             class lineinfo(object):
                 # Introducing revision of this line.
                 rev = attr.ib()
                 # Line number for this line in its introducing revision.
                 linenum = attr.ib()
                 # Private. Offset in the linelog program of this line. Used internally.
                 _offset = attr.ib()
             @attr.s
             class annotateresult(object):
                 rev = attr.ib()
                 lines = attr.ib()
                 _eof = attr.ib()
                 def __iter__(self):
                     return iter(self.lines)
-            class _llinstruction(object):
+            class _llinstruction(object):  # pytype: disable=ignored-metaclass
                 __metaclass__ = abc.ABCMeta
                 @abc.abstractmethod
                 def __init__(self, op1, op2):
                     pass
                 @abc.abstractmethod
                 def __str__(self):
                     pass
                 def __repr__(self):
                     return str(self)
                 @abc.abstractmethod
                 def __eq__(self, other):
                     pass
                 @abc.abstractmethod
                 def encode(self):
                     """Encode this instruction to the binary linelog format."""
                 @abc.abstractmethod
                 def execute(self, rev, pc, emit):
                     """Execute this instruction.
                     Args:
                       rev: The revision we're annotating.
                       pc: The current offset in the linelog program.
                       emit: A function that accepts a single lineinfo object.
                     Returns:
                       The new value of pc. Returns None if exeuction should stop
                       (that is, we've found the end of the file.)
                     """
             class _jge(_llinstruction):
                 """If the current rev is greater than or equal to op1, jump to op2."""
                 def __init__(self, op1, op2):
                     self._cmprev = op1
                     self._target = op2
                 def __str__(self):
                     return r'JGE %d %d' % (self._cmprev, self._target)
                 def __eq__(self, other):
                     return (
                         type(self) == type(other)
                         and self._cmprev == other._cmprev
                         and self._target == other._target
                     )
                 def encode(self):
                     return _llentry.pack(self._cmprev << 2, self._target)
                 def execute(self, rev, pc, emit):
                     if rev >= self._cmprev:
                         return self._target
                     return pc + 1
             class _jump(_llinstruction):
                 """Unconditional jumps are expressed as a JGE with op1 set to 0."""
                 def __init__(self, op1, op2):
                     if op1 != 0:
                         raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)
                     self._target = op2
                 def __str__(self):
                     return r'JUMP %d' % (self._target)
                 def __eq__(self, other):
                     return type(self) == type(other) and self._target == other._target
                 def encode(self):
                     return _llentry.pack(0, self._target)
                 def execute(self, rev, pc, emit):
                     return self._target
             class _eof(_llinstruction):
                 """EOF is expressed as a JGE that always jumps to 0."""
                 def __init__(self, op1, op2):
                     if op1 != 0:
                         raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)
                     if op2 != 0:
                         raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)
                 def __str__(self):
                     return r'EOF'
                 def __eq__(self, other):
                     return type(self) == type(other)
                 def encode(self):
                     return _llentry.pack(0, 0)
                 def execute(self, rev, pc, emit):
                     return None
             class _jl(_llinstruction):
                 """If the current rev is less than op1, jump to op2."""
                 def __init__(self, op1, op2):
                     self._cmprev = op1
                     self._target = op2
                 def __str__(self):
                     return r'JL %d %d' % (self._cmprev, self._target)
                 def __eq__(self, other):
                     return (
                         type(self) == type(other)
                         and self._cmprev == other._cmprev
                         and self._target == other._target
                     )
                 def encode(self):
                     return _llentry.pack(1 | (self._cmprev << 2), self._target)
                 def execute(self, rev, pc, emit):
                     if rev < self._cmprev:
                         return self._target
                     return pc + 1
             class _line(_llinstruction):
                 """Emit a line."""
                 def __init__(self, op1, op2):
                     # This line was introduced by this revision number.
                     self._rev = op1
                     # This line had the specified line number in the introducing revision.
                     self._origlineno = op2
                 def __str__(self):
                     return r'LINE %d %d' % (self._rev, self._origlineno)
                 def __eq__(self, other):
                     return (
                         type(self) == type(other)
                         and self._rev == other._rev
                         and self._origlineno == other._origlineno
                     )
                 def encode(self):
                     return _llentry.pack(2 | (self._rev << 2), self._origlineno)
                 def execute(self, rev, pc, emit):
                     emit(lineinfo(self._rev, self._origlineno, pc))
                     return pc + 1
             def _decodeone(data, offset):
                 """Decode a single linelog instruction from an offset in a buffer."""
                 try:
                     op1, op2 = _llentry.unpack_from(data, offset)
                 except struct.error as e:
                     raise LineLogError(b'reading an instruction failed: %r' % e)
                 opcode = op1 & 0b11
                 op1 = op1 >> 2
                 if opcode == 0:
                     if op1 == 0:
                         if op2 == 0:
                             return _eof(op1, op2)
                         return _jump(op1, op2)
                     return _jge(op1, op2)
                 elif opcode == 1:
                     return _jl(op1, op2)
                 elif opcode == 2:
                     return _line(op1, op2)
                 raise NotImplementedError(b'Unimplemented opcode %r' % opcode)
             class linelog(object):
                 """Efficient cache for per-line history information."""
                 def __init__(self, program=None, maxrev=0):
                     if program is None:
                         # We pad the program with an extra leading EOF so that our
                         # offsets will match the C code exactly. This means we can
                         # interoperate with the C code.
                         program = [_eof(0, 0), _eof(0, 0)]
                     self._program = program
                     self._lastannotate = None
                     self._maxrev = maxrev
                 def __eq__(self, other):
                     return (
                         type(self) == type(other)
                         and self._program == other._program
                         and self._maxrev == other._maxrev
                     )
                 def __repr__(self):
                     return b'<linelog at %s: maxrev=%d size=%d>' % (
                         hex(id(self)),
                         self._maxrev,
                         len(self._program),
                     )
                 def debugstr(self):
                     fmt = r'%%%dd %%s' % len(str(len(self._program)))
                     return pycompat.sysstr(b'\n').join(
                         fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)
                     )
                 @classmethod
                 def fromdata(cls, buf):
                     if len(buf) % _llentry.size != 0:
                         raise LineLogError(
                             b"invalid linelog buffer size %d (must be a multiple of %d)"
                             % (len(buf), _llentry.size)
                         )
                     expected = len(buf) / _llentry.size
                     fakejge = _decodeone(buf, 0)
                     if isinstance(fakejge, _jump):
                         maxrev = 0
                     else:
                         maxrev = fakejge._cmprev
                     numentries = fakejge._target
                     if expected != numentries:
                         raise LineLogError(
                             b"corrupt linelog data: claimed"
                             b" %d entries but given data for %d entries"
                             % (expected, numentries)
                         )
                     instructions = [_eof(0, 0)]
                     for offset in pycompat.xrange(1, numentries):
                         instructions.append(_decodeone(buf, offset * _llentry.size))
                     return cls(instructions, maxrev=maxrev)
                 def encode(self):
                     hdr = _jge(self._maxrev, len(self._program)).encode()
                     return hdr + b''.join(i.encode() for i in self._program[1:])
                 def clear(self):
                     self._program = []
                     self._maxrev = 0
                     self._lastannotate = None
                 def replacelines_vec(self, rev, a1, a2, blines):
                     return self.replacelines(
                         rev, a1, a2, 0, len(blines), _internal_blines=blines
                     )
                 def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
                     """Replace lines [a1, a2) with lines [b1, b2)."""
                     if self._lastannotate:
                         # TODO(augie): make replacelines() accept a revision at
                         # which we're editing as well as a revision to mark
                         # responsible for the edits. In hg-experimental it's
                         # stateful like this, so we're doing the same thing to
                         # retain compatibility with absorb until that's imported.
                         ar = self._lastannotate
                     else:
                         ar = self.annotate(rev)
                         #        ar = self.annotate(self._maxrev)
                     if a1 > len(ar.lines):
                         raise LineLogError(
                             b'%d contains %d lines, tried to access line %d'
                             % (rev, len(ar.lines), a1)
                         )
                     elif a1 == len(ar.lines):
                         # Simulated EOF instruction since we're at EOF, which
                         # doesn't have a "real" line.
                         a1inst = _eof(0, 0)
                         a1info = lineinfo(0, 0, ar._eof)
                     else:
                         a1info = ar.lines[a1]
                         a1inst = self._program[a1info._offset]
                     programlen = self._program.__len__
                     oldproglen = programlen()
                     appendinst = self._program.append
                     # insert
                     blineinfos = []
                     bappend = blineinfos.append
                     if b1 < b2:
                         # Determine the jump target for the JGE at the start of
                         # the new block.
                         tgt = oldproglen + (b2 - b1 + 1)
                         # Jump to skip the insert if we're at an older revision.
                         appendinst(_jl(rev, tgt))
                         for linenum in pycompat.xrange(b1, b2):
                             if _internal_blines is None:
                                 bappend(lineinfo(rev, linenum, programlen()))
                                 appendinst(_line(rev, linenum))
                             else:
                                 newrev, newlinenum = _internal_blines[linenum]
                                 bappend(lineinfo(newrev, newlinenum, programlen()))
                                 appendinst(_line(newrev, newlinenum))
                     # delete
                     if a1 < a2:
                         if a2 > len(ar.lines):
                             raise LineLogError(
                                 b'%d contains %d lines, tried to access line %d'
                                 % (rev, len(ar.lines), a2)
                             )
                         elif a2 == len(ar.lines):
                             endaddr = ar._eof
                         else:
                             endaddr = ar.lines[a2]._offset
                         if a2 > 0 and rev < self._maxrev:
                             # If we're here, we're deleting a chunk of an old
                             # commit, so we need to be careful and not touch
                             # invisible lines between a2-1 and a2 (IOW, lines that
                             # are added later).
                             endaddr = ar.lines[a2 - 1]._offset + 1
                         appendinst(_jge(rev, endaddr))
                     # copy instruction from a1
                     a1instpc = programlen()
                     appendinst(a1inst)
                     # if a1inst isn't a jump or EOF, then we need to add an unconditional
                     # jump back into the program here.
                     if not isinstance(a1inst, (_jump, _eof)):
                         appendinst(_jump(0, a1info._offset + 1))
                     # Patch instruction at a1, which makes our patch live.
                     self._program[a1info._offset] = _jump(0, oldproglen)
                     # Update self._lastannotate in place. This serves as a cache to avoid
                     # expensive "self.annotate" in this function, when "replacelines" is
                     # used continuously.
                     if len(self._lastannotate.lines) > a1:
                         self._lastannotate.lines[a1]._offset = a1instpc
                     else:
                         assert isinstance(a1inst, _eof)
                         self._lastannotate._eof = a1instpc
                     self._lastannotate.lines[a1:a2] = blineinfos
                     self._lastannotate.rev = max(self._lastannotate.rev, rev)
                     if rev > self._maxrev:
                         self._maxrev = rev
                 def annotate(self, rev):
                     pc = 1
                     lines = []
                     executed = 0
                     # Sanity check: if instructions executed exceeds len(program), we
                     # hit an infinite loop in the linelog program somehow and we
                     # should stop.
                     while pc is not None and executed < len(self._program):
                         inst = self._program[pc]
                         lastpc = pc
                         pc = inst.execute(rev, pc, lines.append)
                         executed += 1
                     if pc is not None:
                         raise LineLogError(
                             r'Probably hit an infinite loop in linelog. Program:\n'
                             + self.debugstr()
                         )
                     ar = annotateresult(rev, lines, lastpc)
                     self._lastannotate = ar
                     return ar
                 @property
                 def maxrev(self):
                     return self._maxrev
                 # Stateful methods which depend on the value of the last
                 # annotation run. This API is for compatiblity with the original
                 # linelog, and we should probably consider refactoring it.
                 @property
                 def annotateresult(self):
                     """Return the last annotation result. C linelog code exposed this."""
                     return [(l.rev, l.linenum) for l in self._lastannotate.lines]
                 def getoffset(self, line):
                     return self._lastannotate.lines[line]._offset
                 def getalllines(self, start=0, end=0):
                     """Get all lines that ever occurred in [start, end).
                     Passing start == end == 0 means "all lines ever".
                     This works in terms of *internal* program offsets, not line numbers.
                     """
                     pc = start or 1
                     lines = []
                     # only take as many steps as there are instructions in the
                     # program - if we don't find an EOF or our stop-line before
                     # then, something is badly broken.
                     for step in pycompat.xrange(len(self._program)):
                         inst = self._program[pc]
                         nextpc = pc + 1
                         if isinstance(inst, _jump):
                             nextpc = inst._target
                         elif isinstance(inst, _eof):
                             return lines
                         elif isinstance(inst, (_jl, _jge)):
                             pass
                         elif isinstance(inst, _line):
                             lines.append((inst._rev, inst._origlineno))
                         else:
                             raise LineLogError(b"Illegal instruction %r" % inst)
                         if nextpc == end:
                             return lines
                         pc = nextpc
                     raise LineLogError(b"Failed to perform getalllines")