upstream/mercurial-mirror Commit - r52188:011eec5a

1

# linelog - efficient cache for annotate data

1

# linelog - efficient cache for annotate data

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

"""linelog is an efficient cache for annotate data inspired by SCCS Weaves.

7

"""linelog is an efficient cache for annotate data inspired by SCCS Weaves.

8

9

SCCS Weaves are an implementation of

9

SCCS Weaves are an implementation of

10

https://en.wikipedia.org/wiki/Interleaved_deltas. See

10

https://en.wikipedia.org/wiki/Interleaved_deltas. See

11

mercurial/helptext/internals/linelog.txt for an exploration of SCCS weaves

11

mercurial/helptext/internals/linelog.txt for an exploration of SCCS weaves

12

and how linelog works in detail.

12

and how linelog works in detail.

13

14

Here's a hacker's summary: a linelog is a program which is executed in

14

Here's a hacker's summary: a linelog is a program which is executed in

15

the context of a revision. Executing the program emits information

15

the context of a revision. Executing the program emits information

16

about lines, including the revision that introduced them and the line

16

about lines, including the revision that introduced them and the line

17

number in the file at the introducing revision. When an insertion or

17

number in the file at the introducing revision. When an insertion or

18

deletion is performed on the file, a jump instruction is used to patch

18

deletion is performed on the file, a jump instruction is used to patch

19

in a new body of annotate information.

19

in a new body of annotate information.

20

"""

20

"""

21

22

import abc

22

import abc

23

import struct

23

import struct

24

25

from .thirdparty import attr

25

from .thirdparty import attr

26

from . import pycompat

26

from . import pycompat

27

28

_llentry = struct.Struct(b'>II')

28

_llentry = struct.Struct(b'>II')

29

30

31

class LineLogError(Exception):

31

class LineLogError(Exception):

32

"""Error raised when something bad happens internally in linelog."""

32

"""Error raised when something bad happens internally in linelog."""

33

34

35

@attr.s

35

@attr.s

36

class lineinfo:

36

class lineinfo:

37

# Introducing revision of this line.

37

# Introducing revision of this line.

38

rev = attr.ib()

38

rev = attr.ib()

39

# Line number for this line in its introducing revision.

39

# Line number for this line in its introducing revision.

40

linenum = attr.ib()

40

linenum = attr.ib()

41

# Private. Offset in the linelog program of this line. Used internally.

41

# Private. Offset in the linelog program of this line. Used internally.

42

_offset = attr.ib()

42

_offset = attr.ib()

43

44

45

@attr.s

45

@attr.s

46

class annotateresult:

46

class annotateresult:

47

rev = attr.ib()

47

rev = attr.ib()

48

lines = attr.ib()

48

lines = attr.ib(type=bytearray)

49

_eof = attr.ib()

49

_eof = attr.ib()

50

51

def __iter__(self):

51

def __iter__(self):

52

return iter(self.lines)

52

return iter(self.lines)

53

54

55

class _llinstruction: # pytype: disable=ignored-metaclass

55

class _llinstruction: # pytype: disable=ignored-metaclass

56

57

__metaclass__ = abc.ABCMeta

57

__metaclass__ = abc.ABCMeta

58

59

@abc.abstractmethod

59

@abc.abstractmethod

60

def __init__(self, op1, op2):

60

def __init__(self, op1, op2):

61

pass

61

pass

62

63

@abc.abstractmethod

63

@abc.abstractmethod

64

def __str__(self):

64

def __str__(self):

65

pass

65

pass

66

67

def __repr__(self):

67

def __repr__(self):

68

return str(self)

68

return str(self)

69

70

@abc.abstractmethod

70

@abc.abstractmethod

71

def __eq__(self, other):

71

def __eq__(self, other):

72

pass

72

pass

73

74

@abc.abstractmethod

74

@abc.abstractmethod

75

def encode(self):

75

def encode(self):

76

"""Encode this instruction to the binary linelog format."""

76

"""Encode this instruction to the binary linelog format."""

77

78

@abc.abstractmethod

78

@abc.abstractmethod

79

def execute(self, rev, pc, emit):

79

def execute(self, rev, pc, emit):

80

"""Execute this instruction.

80

"""Execute this instruction.

81

82

Args:

82

Args:

83

rev: The revision we're annotating.

83

rev: The revision we're annotating.

84

pc: The current offset in the linelog program.

84

pc: The current offset in the linelog program.

85

emit: A function that accepts a single lineinfo object.

85

emit: A function that accepts a single lineinfo object.

86

87

Returns:

87

Returns:

88

The new value of pc. Returns None if exeuction should stop

88

The new value of pc. Returns None if exeuction should stop

89

(that is, we've found the end of the file.)

89

(that is, we've found the end of the file.)

90

"""

90

"""

91

92

93

class _jge(_llinstruction):

93

class _jge(_llinstruction):

94

"""If the current rev is greater than or equal to op1, jump to op2."""

94

"""If the current rev is greater than or equal to op1, jump to op2."""

95

96

def __init__(self, op1, op2):

96

def __init__(self, op1, op2):

97

self._cmprev = op1

97

self._cmprev = op1

98

self._target = op2

98

self._target = op2

99

100

def __str__(self):

100

def __str__(self):

101

return 'JGE %d %d' % (self._cmprev, self._target)

101

return 'JGE %d %d' % (self._cmprev, self._target)

102

103

def __eq__(self, other):

103

def __eq__(self, other):

104

return (

104

return (

105

type(self) == type(other)

105

type(self) == type(other)

106

and self._cmprev == other._cmprev

106

and self._cmprev == other._cmprev

107

and self._target == other._target

107

and self._target == other._target

108

)

108

)

109

110

def encode(self):

110

def encode(self):

111

return _llentry.pack(self._cmprev << 2, self._target)

111

return _llentry.pack(self._cmprev << 2, self._target)

112

113

def execute(self, rev, pc, emit):

113

def execute(self, rev, pc, emit):

114

if rev >= self._cmprev:

114

if rev >= self._cmprev:

115

return self._target

115

return self._target

116

return pc + 1

116

return pc + 1

117

118

119

class _jump(_llinstruction):

119

class _jump(_llinstruction):

120

"""Unconditional jumps are expressed as a JGE with op1 set to 0."""

120

"""Unconditional jumps are expressed as a JGE with op1 set to 0."""

121

122

def __init__(self, op1, op2):

122

def __init__(self, op1, op2):

123

if op1 != 0:

123

if op1 != 0:

124

raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)

124

raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)

125

self._target = op2

125

self._target = op2

126

127

def __str__(self):

127

def __str__(self):

128

return 'JUMP %d' % (self._target)

128

return 'JUMP %d' % (self._target)

129

130

def __eq__(self, other):

130

def __eq__(self, other):

131

return type(self) == type(other) and self._target == other._target

131

return type(self) == type(other) and self._target == other._target

132

133

def encode(self):

133

def encode(self):

134

return _llentry.pack(0, self._target)

134

return _llentry.pack(0, self._target)

135

136

def execute(self, rev, pc, emit):

136

def execute(self, rev, pc, emit):

137

return self._target

137

return self._target

138

139

140

class _eof(_llinstruction):

140

class _eof(_llinstruction):

141

"""EOF is expressed as a JGE that always jumps to 0."""

141

"""EOF is expressed as a JGE that always jumps to 0."""

142

143

def __init__(self, op1, op2):

143

def __init__(self, op1, op2):

144

if op1 != 0:

144

if op1 != 0:

145

raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)

145

raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)

146

if op2 != 0:

146

if op2 != 0:

147

raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)

147

raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)

148

149

def __str__(self):

149

def __str__(self):

150

return r'EOF'

150

return r'EOF'

151

152

def __eq__(self, other):

152

def __eq__(self, other):

153

return type(self) == type(other)

153

return type(self) == type(other)

154

155

def encode(self):

155

def encode(self):

156

return _llentry.pack(0, 0)

156

return _llentry.pack(0, 0)

157

158

def execute(self, rev, pc, emit):

158

def execute(self, rev, pc, emit):

159

return None

159

return None

160

161

162

class _jl(_llinstruction):

162

class _jl(_llinstruction):

163

"""If the current rev is less than op1, jump to op2."""

163

"""If the current rev is less than op1, jump to op2."""

164

165

def __init__(self, op1, op2):

165

def __init__(self, op1, op2):

166

self._cmprev = op1

166

self._cmprev = op1

167

self._target = op2

167

self._target = op2

168

169

def __str__(self):

169

def __str__(self):

170

return 'JL %d %d' % (self._cmprev, self._target)

170

return 'JL %d %d' % (self._cmprev, self._target)

171

172

def __eq__(self, other):

172

def __eq__(self, other):

173

return (

173

return (

174

type(self) == type(other)

174

type(self) == type(other)

175

and self._cmprev == other._cmprev

175

and self._cmprev == other._cmprev

176

and self._target == other._target

176

and self._target == other._target

177

)

177

)

178

179

def encode(self):

179

def encode(self):

180

return _llentry.pack(1 | (self._cmprev << 2), self._target)

180

return _llentry.pack(1 | (self._cmprev << 2), self._target)

181

182

def execute(self, rev, pc, emit):

182

def execute(self, rev, pc, emit):

183

if rev < self._cmprev:

183

if rev < self._cmprev:

184

return self._target

184

return self._target

185

return pc + 1

185

return pc + 1

186

187

188

class _line(_llinstruction):

188

class _line(_llinstruction):

189

"""Emit a line."""

189

"""Emit a line."""

190

191

def __init__(self, op1, op2):

191

def __init__(self, op1, op2):

192

# This line was introduced by this revision number.

192

# This line was introduced by this revision number.

193

self._rev = op1

193

self._rev = op1

194

# This line had the specified line number in the introducing revision.

194

# This line had the specified line number in the introducing revision.

195

self._origlineno = op2

195

self._origlineno = op2

196

197

def __str__(self):

197

def __str__(self):

198

return 'LINE %d %d' % (self._rev, self._origlineno)

198

return 'LINE %d %d' % (self._rev, self._origlineno)

199

200

def __eq__(self, other):

200

def __eq__(self, other):

201

return (

201

return (

202

type(self) == type(other)

202

type(self) == type(other)

203

and self._rev == other._rev

203

and self._rev == other._rev

204

and self._origlineno == other._origlineno

204

and self._origlineno == other._origlineno

205

)

205

)

206

207

def encode(self):

207

def encode(self):

208

return _llentry.pack(2 | (self._rev << 2), self._origlineno)

208

return _llentry.pack(2 | (self._rev << 2), self._origlineno)

209

210

def execute(self, rev, pc, emit):

210

def execute(self, rev, pc, emit):

211

emit(lineinfo(self._rev, self._origlineno, pc))

211

emit(lineinfo(self._rev, self._origlineno, pc))

212

return pc + 1

212

return pc + 1

213

214

215

def _decodeone(data, offset):

215

def _decodeone(data, offset):

216

"""Decode a single linelog instruction from an offset in a buffer."""

216

"""Decode a single linelog instruction from an offset in a buffer."""

217

try:

217

try:

218

op1, op2 = _llentry.unpack_from(data, offset)

218

op1, op2 = _llentry.unpack_from(data, offset)

219

except struct.error as e:

219

except struct.error as e:

220

raise LineLogError(b'reading an instruction failed: %r' % e)

220

raise LineLogError(b'reading an instruction failed: %r' % e)

221

opcode = op1 & 0b11

221

opcode = op1 & 0b11

222

op1 = op1 >> 2

222

op1 = op1 >> 2

223

if opcode == 0:

223

if opcode == 0:

224

if op1 == 0:

224

if op1 == 0:

225

if op2 == 0:

225

if op2 == 0:

226

return _eof(op1, op2)

226

return _eof(op1, op2)

227

return _jump(op1, op2)

227

return _jump(op1, op2)

228

return _jge(op1, op2)

228

return _jge(op1, op2)

229

elif opcode == 1:

229

elif opcode == 1:

230

return _jl(op1, op2)

230

return _jl(op1, op2)

231

elif opcode == 2:

231

elif opcode == 2:

232

return _line(op1, op2)

232

return _line(op1, op2)

233

raise NotImplementedError(b'Unimplemented opcode %r' % opcode)

233

raise NotImplementedError(b'Unimplemented opcode %r' % opcode)

234

235

236

class linelog:

236

class linelog:

237

"""Efficient cache for per-line history information."""

237

"""Efficient cache for per-line history information."""

238

239

def __init__(self, program=None, maxrev=0):

239

def __init__(self, program=None, maxrev=0):

240

if program is None:

240

if program is None:

241

# We pad the program with an extra leading EOF so that our

241

# We pad the program with an extra leading EOF so that our

242

# offsets will match the C code exactly. This means we can

242

# offsets will match the C code exactly. This means we can

243

# interoperate with the C code.

243

# interoperate with the C code.

244

program = [_eof(0, 0), _eof(0, 0)]

244

program = [_eof(0, 0), _eof(0, 0)]

245

self._program = program

245

self._program = program

246

self._lastannotate = None

246

self._lastannotate = None

247

self._maxrev = maxrev

247

self._maxrev = maxrev

248

249

def __eq__(self, other):

249

def __eq__(self, other):

250

return (

250

return (

251

type(self) == type(other)

251

type(self) == type(other)

252

and self._program == other._program

252

and self._program == other._program

253

and self._maxrev == other._maxrev

253

and self._maxrev == other._maxrev

254

)

254

)

255

256

def __repr__(self):

256

def __repr__(self):

257

return '<linelog at %s: maxrev=%d size=%d>' % (

257

return '<linelog at %s: maxrev=%d size=%d>' % (

258

hex(id(self)),

258

hex(id(self)),

259

self._maxrev,

259

self._maxrev,

260

len(self._program),

260

len(self._program),

261

)

261

)

262

263

def debugstr(self):

263

def debugstr(self):

264

fmt = '%%%dd %%s' % len(str(len(self._program)))

264

fmt = '%%%dd %%s' % len(str(len(self._program)))

265

return pycompat.sysstr(b'\n').join(

265

return pycompat.sysstr(b'\n').join(

266

fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)

266

fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)

267

)

267

)

268

269

@classmethod

269

@classmethod

270

def fromdata(cls, buf):

270

def fromdata(cls, buf):

271

if len(buf) % _llentry.size != 0:

271

if len(buf) % _llentry.size != 0:

272

raise LineLogError(

272

raise LineLogError(

273

b"invalid linelog buffer size %d (must be a multiple of %d)"

273

b"invalid linelog buffer size %d (must be a multiple of %d)"

274

% (len(buf), _llentry.size)

274

% (len(buf), _llentry.size)

275

)

275

)

276

expected = len(buf) / _llentry.size

276

expected = len(buf) / _llentry.size

277

fakejge = _decodeone(buf, 0)

277

fakejge = _decodeone(buf, 0)

278

if isinstance(fakejge, _jump):

278

if isinstance(fakejge, _jump):

279

maxrev = 0

279

maxrev = 0

280

elif isinstance(fakejge, (_jge, _jl)):

280

elif isinstance(fakejge, (_jge, _jl)):

281

maxrev = fakejge._cmprev

281

maxrev = fakejge._cmprev

282

else:

282

else:

283

raise LineLogError(

283

raise LineLogError(

284

'Expected one of _jump, _jge, or _jl. Got %s.'

284

'Expected one of _jump, _jge, or _jl. Got %s.'

285

% type(fakejge).__name__

285

% type(fakejge).__name__

286

)

286

)

287

assert isinstance(fakejge, (_jump, _jge, _jl)) # help pytype

287

assert isinstance(fakejge, (_jump, _jge, _jl)) # help pytype

288

numentries = fakejge._target

288

numentries = fakejge._target

289

if expected != numentries:

289

if expected != numentries:

290

raise LineLogError(

290

raise LineLogError(

291

b"corrupt linelog data: claimed"

291

b"corrupt linelog data: claimed"

292

b" %d entries but given data for %d entries"

292

b" %d entries but given data for %d entries"

293

% (expected, numentries)

293

% (expected, numentries)

294

)

294

)

295

instructions = [_eof(0, 0)]

295

instructions = [_eof(0, 0)]

296

for offset in range(1, numentries):

296

for offset in range(1, numentries):

297

instructions.append(_decodeone(buf, offset * _llentry.size))

297

instructions.append(_decodeone(buf, offset * _llentry.size))

298

return cls(instructions, maxrev=maxrev)

298

return cls(instructions, maxrev=maxrev)

299

300

def encode(self):

300

def encode(self):

301

hdr = _jge(self._maxrev, len(self._program)).encode()

301

hdr = _jge(self._maxrev, len(self._program)).encode()

302

return hdr + b''.join(i.encode() for i in self._program[1:])

302

return hdr + b''.join(i.encode() for i in self._program[1:])

303

304

def clear(self):

304

def clear(self):

305

self._program = []

305

self._program = []

306

self._maxrev = 0

306

self._maxrev = 0

307

self._lastannotate = None

307

self._lastannotate = None

308

309

def replacelines_vec(self, rev, a1, a2, blines):

309

def replacelines_vec(self, rev, a1, a2, blines):

310

return self.replacelines(

310

return self.replacelines(

311

rev, a1, a2, 0, len(blines), _internal_blines=blines

311

rev, a1, a2, 0, len(blines), _internal_blines=blines

312

)

312

)

313

314

def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):

314

def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):

315

"""Replace lines [a1, a2) with lines [b1, b2)."""

315

"""Replace lines [a1, a2) with lines [b1, b2)."""

316

if self._lastannotate:

316

if self._lastannotate:

317

# TODO(augie): make replacelines() accept a revision at

317

# TODO(augie): make replacelines() accept a revision at

318

# which we're editing as well as a revision to mark

318

# which we're editing as well as a revision to mark

319

# responsible for the edits. In hg-experimental it's

319

# responsible for the edits. In hg-experimental it's

320

# stateful like this, so we're doing the same thing to

320

# stateful like this, so we're doing the same thing to

321

# retain compatibility with absorb until that's imported.

321

# retain compatibility with absorb until that's imported.

322

ar = self._lastannotate

322

ar = self._lastannotate

323

else:

323

else:

324

ar = self.annotate(rev)

324

ar = self.annotate(rev)

325

# ar = self.annotate(self._maxrev)

325

# ar = self.annotate(self._maxrev)

326

if a1 > len(ar.lines):

326

if a1 > len(ar.lines):

327

raise LineLogError(

327

raise LineLogError(

328

b'%d contains %d lines, tried to access line %d'

328

b'%d contains %d lines, tried to access line %d'

329

% (rev, len(ar.lines), a1)

329

% (rev, len(ar.lines), a1)

330

)

330

)

331

elif a1 == len(ar.lines):

331

elif a1 == len(ar.lines):

332

# Simulated EOF instruction since we're at EOF, which

332

# Simulated EOF instruction since we're at EOF, which

333

# doesn't have a "real" line.

333

# doesn't have a "real" line.

334

a1inst = _eof(0, 0)

334

a1inst = _eof(0, 0)

335

a1info = lineinfo(0, 0, ar._eof)

335

a1info = lineinfo(0, 0, ar._eof)

336

else:

336

else:

337

a1info = ar.lines[a1]

337

a1info = ar.lines[a1]

338

a1inst = self._program[a1info._offset]

338

a1inst = self._program[a1info._offset]

339

programlen = self._program.__len__

339

programlen = self._program.__len__

340

oldproglen = programlen()

340

oldproglen = programlen()

341

appendinst = self._program.append

341

appendinst = self._program.append

342

343

# insert

343

# insert

344

blineinfos = []

344

blineinfos = []

345

bappend = blineinfos.append

345

bappend = blineinfos.append

346

if b1 < b2:

346

if b1 < b2:

347

# Determine the jump target for the JGE at the start of

347

# Determine the jump target for the JGE at the start of

348

# the new block.

348

# the new block.

349

tgt = oldproglen + (b2 - b1 + 1)

349

tgt = oldproglen + (b2 - b1 + 1)

350

# Jump to skip the insert if we're at an older revision.

350

# Jump to skip the insert if we're at an older revision.

351

appendinst(_jl(rev, tgt))

351

appendinst(_jl(rev, tgt))

352

for linenum in range(b1, b2):

352

for linenum in range(b1, b2):

353

if _internal_blines is None:

353

if _internal_blines is None:

354

bappend(lineinfo(rev, linenum, programlen()))

354

bappend(lineinfo(rev, linenum, programlen()))

355

appendinst(_line(rev, linenum))

355

appendinst(_line(rev, linenum))

356

else:

356

else:

357

newrev, newlinenum = _internal_blines[linenum]

357

newrev, newlinenum = _internal_blines[linenum]

358

bappend(lineinfo(newrev, newlinenum, programlen()))

358

bappend(lineinfo(newrev, newlinenum, programlen()))

359

appendinst(_line(newrev, newlinenum))

359

appendinst(_line(newrev, newlinenum))

360

# delete

360

# delete

361

if a1 < a2:

361

if a1 < a2:

362

if a2 > len(ar.lines):

362

if a2 > len(ar.lines):

363

raise LineLogError(

363

raise LineLogError(

364

b'%d contains %d lines, tried to access line %d'

364

b'%d contains %d lines, tried to access line %d'

365

% (rev, len(ar.lines), a2)

365

% (rev, len(ar.lines), a2)

366

)

366

)

367

elif a2 == len(ar.lines):

367

elif a2 == len(ar.lines):

368

endaddr = ar._eof

368

endaddr = ar._eof

369

else:

369

else:

370

endaddr = ar.lines[a2]._offset

370

endaddr = ar.lines[a2]._offset

371

if a2 > 0 and rev < self._maxrev:

371

if a2 > 0 and rev < self._maxrev:

372

# If we're here, we're deleting a chunk of an old

372

# If we're here, we're deleting a chunk of an old

373

# commit, so we need to be careful and not touch

373

# commit, so we need to be careful and not touch

374

# invisible lines between a2-1 and a2 (IOW, lines that

374

# invisible lines between a2-1 and a2 (IOW, lines that

375

# are added later).

375

# are added later).

376

endaddr = ar.lines[a2 - 1]._offset + 1

376

endaddr = ar.lines[a2 - 1]._offset + 1

377

appendinst(_jge(rev, endaddr))

377

appendinst(_jge(rev, endaddr))

378

# copy instruction from a1

378

# copy instruction from a1

379

a1instpc = programlen()

379

a1instpc = programlen()

380

appendinst(a1inst)

380

appendinst(a1inst)

381

# if a1inst isn't a jump or EOF, then we need to add an unconditional

381

# if a1inst isn't a jump or EOF, then we need to add an unconditional

382

# jump back into the program here.

382

# jump back into the program here.

383

if not isinstance(a1inst, (_jump, _eof)):

383

if not isinstance(a1inst, (_jump, _eof)):

384

appendinst(_jump(0, a1info._offset + 1))

384

appendinst(_jump(0, a1info._offset + 1))

385

# Patch instruction at a1, which makes our patch live.

385

# Patch instruction at a1, which makes our patch live.

386

self._program[a1info._offset] = _jump(0, oldproglen)

386

self._program[a1info._offset] = _jump(0, oldproglen)

387

388

# Update self._lastannotate in place. This serves as a cache to avoid

388

# Update self._lastannotate in place. This serves as a cache to avoid

389

# expensive "self.annotate" in this function, when "replacelines" is

389

# expensive "self.annotate" in this function, when "replacelines" is

390

# used continuously.

390

# used continuously.

391

if len(self._lastannotate.lines) > a1:

391

if len(self._lastannotate.lines) > a1:

392

self._lastannotate.lines[a1]._offset = a1instpc

392

self._lastannotate.lines[a1]._offset = a1instpc

393

else:

393

else:

394

assert isinstance(a1inst, _eof)

394

assert isinstance(a1inst, _eof)

395

self._lastannotate._eof = a1instpc

395

self._lastannotate._eof = a1instpc

396

self._lastannotate.lines[a1:a2] = blineinfos

396

self._lastannotate.lines[a1:a2] = blineinfos

397

self._lastannotate.rev = max(self._lastannotate.rev, rev)

397

self._lastannotate.rev = max(self._lastannotate.rev, rev)

398

399

if rev > self._maxrev:

399

if rev > self._maxrev:

400

self._maxrev = rev

400

self._maxrev = rev

401

402

def annotate(self, rev):

402

def annotate(self, rev):

403

pc = 1

403

pc = 1

404

lines = []

404

lines = []

405

executed = 0

405

executed = 0

406

# Sanity check: if instructions executed exceeds len(program), we

406

# Sanity check: if instructions executed exceeds len(program), we

407

# hit an infinite loop in the linelog program somehow and we

407

# hit an infinite loop in the linelog program somehow and we

408

# should stop.

408

# should stop.

409

while pc is not None and executed < len(self._program):

409

while pc is not None and executed < len(self._program):

410

inst = self._program[pc]

410

inst = self._program[pc]

411

lastpc = pc

411

lastpc = pc

412

pc = inst.execute(rev, pc, lines.append)

412

pc = inst.execute(rev, pc, lines.append)

413

executed += 1

413

executed += 1

414

if pc is not None:

414

if pc is not None:

415

raise LineLogError(

415

raise LineLogError(

416

r'Probably hit an infinite loop in linelog. Program:\n'

416

r'Probably hit an infinite loop in linelog. Program:\n'

417

+ self.debugstr()

417

+ self.debugstr()

418

)

418

)

419

ar = annotateresult(rev, lines, lastpc)

419

ar = annotateresult(rev, lines, lastpc)

420

self._lastannotate = ar

420

self._lastannotate = ar

421

return ar

421

return ar

422

423

@property

423

@property

424

def maxrev(self):

424

def maxrev(self):

425

return self._maxrev

425

return self._maxrev

426

427

# Stateful methods which depend on the value of the last

427

# Stateful methods which depend on the value of the last

428

# annotation run. This API is for compatiblity with the original

428

# annotation run. This API is for compatiblity with the original

429

# linelog, and we should probably consider refactoring it.

429

# linelog, and we should probably consider refactoring it.

430

@property

430

@property

431

def annotateresult(self):

431

def annotateresult(self):

432

"""Return the last annotation result. C linelog code exposed this."""

432

"""Return the last annotation result. C linelog code exposed this."""

433

return [(l.rev, l.linenum) for l in self._lastannotate.lines]

433

return [(l.rev, l.linenum) for l in self._lastannotate.lines]

434

435

def getoffset(self, line):

435

def getoffset(self, line):

436

return self._lastannotate.lines[line]._offset

436

return self._lastannotate.lines[line]._offset

437

438

def getalllines(self, start=0, end=0):

438

def getalllines(self, start=0, end=0):

439

"""Get all lines that ever occurred in [start, end).

439

"""Get all lines that ever occurred in [start, end).

440

441

Passing start == end == 0 means "all lines ever".

441

Passing start == end == 0 means "all lines ever".

442

443

This works in terms of *internal* program offsets, not line numbers.

443

This works in terms of *internal* program offsets, not line numbers.

444

"""

444

"""

445

pc = start or 1

445

pc = start or 1

446

lines = []

446

lines = []

447

# only take as many steps as there are instructions in the

447

# only take as many steps as there are instructions in the

448

# program - if we don't find an EOF or our stop-line before

448

# program - if we don't find an EOF or our stop-line before

449

# then, something is badly broken.

449

# then, something is badly broken.

450

for step in range(len(self._program)):

450

for step in range(len(self._program)):

451

inst = self._program[pc]

451

inst = self._program[pc]

452

nextpc = pc + 1

452

nextpc = pc + 1

453

if isinstance(inst, _jump):

453

if isinstance(inst, _jump):

454

nextpc = inst._target

454

nextpc = inst._target

455

elif isinstance(inst, _eof):

455

elif isinstance(inst, _eof):

456

return lines

456

return lines

457

elif isinstance(inst, (_jl, _jge)):

457

elif isinstance(inst, (_jl, _jge)):

458

pass

458

pass

459

elif isinstance(inst, _line):

459

elif isinstance(inst, _line):

460

lines.append((inst._rev, inst._origlineno))

460

lines.append((inst._rev, inst._origlineno))

461

else:

461

else:

462

raise LineLogError(b"Illegal instruction %r" % inst)

462

raise LineLogError(b"Illegal instruction %r" % inst)

463

if nextpc == end:

463

if nextpc == end:

464

return lines

464

return lines

465

pc = nextpc

465

pc = nextpc

466

raise LineLogError(b"Failed to perform getalllines")

466

raise LineLogError(b"Failed to perform getalllines")

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # linelog - efficient cache for annotate data
             #
             # Copyright 2018 Google LLC.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """linelog is an efficient cache for annotate data inspired by SCCS Weaves.
             SCCS Weaves are an implementation of
             https://en.wikipedia.org/wiki/Interleaved_deltas. See
             mercurial/helptext/internals/linelog.txt for an exploration of SCCS weaves
             and how linelog works in detail.
             Here's a hacker's summary: a linelog is a program which is executed in
             the context of a revision. Executing the program emits information
             about lines, including the revision that introduced them and the line
             number in the file at the introducing revision. When an insertion or
             deletion is performed on the file, a jump instruction is used to patch
             in a new body of annotate information.
             """
             import abc
             import struct
             from .thirdparty import attr
             from . import pycompat
             _llentry = struct.Struct(b'>II')
             class LineLogError(Exception):
                 """Error raised when something bad happens internally in linelog."""
             @attr.s
             class lineinfo:
                 # Introducing revision of this line.
                 rev = attr.ib()
                 # Line number for this line in its introducing revision.
                 linenum = attr.ib()
                 # Private. Offset in the linelog program of this line. Used internally.
                 _offset = attr.ib()
             @attr.s
             class annotateresult:
                 rev = attr.ib()
-                lines = attr.ib()
+                lines = attr.ib(type=bytearray)
                 _eof = attr.ib()
                 def __iter__(self):
                     return iter(self.lines)
             class _llinstruction:  # pytype: disable=ignored-metaclass
                 __metaclass__ = abc.ABCMeta
                 @abc.abstractmethod
                 def __init__(self, op1, op2):
                     pass
                 @abc.abstractmethod
                 def __str__(self):
                     pass
                 def __repr__(self):
                     return str(self)
                 @abc.abstractmethod
                 def __eq__(self, other):
                     pass
                 @abc.abstractmethod
                 def encode(self):
                     """Encode this instruction to the binary linelog format."""
                 @abc.abstractmethod
                 def execute(self, rev, pc, emit):
                     """Execute this instruction.
                     Args:
                       rev: The revision we're annotating.
                       pc: The current offset in the linelog program.
                       emit: A function that accepts a single lineinfo object.
                     Returns:
                       The new value of pc. Returns None if exeuction should stop
                       (that is, we've found the end of the file.)
                     """
             class _jge(_llinstruction):
                 """If the current rev is greater than or equal to op1, jump to op2."""
                 def __init__(self, op1, op2):
                     self._cmprev = op1
                     self._target = op2
                 def __str__(self):
                     return 'JGE %d %d' % (self._cmprev, self._target)
                 def __eq__(self, other):
                     return (
                         type(self) == type(other)
                         and self._cmprev == other._cmprev
                         and self._target == other._target
                     )
                 def encode(self):
                     return _llentry.pack(self._cmprev << 2, self._target)
                 def execute(self, rev, pc, emit):
                     if rev >= self._cmprev:
                         return self._target
                     return pc + 1
             class _jump(_llinstruction):
                 """Unconditional jumps are expressed as a JGE with op1 set to 0."""
                 def __init__(self, op1, op2):
                     if op1 != 0:
                         raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)
                     self._target = op2
                 def __str__(self):
                     return 'JUMP %d' % (self._target)
                 def __eq__(self, other):
                     return type(self) == type(other) and self._target == other._target
                 def encode(self):
                     return _llentry.pack(0, self._target)
                 def execute(self, rev, pc, emit):
                     return self._target
             class _eof(_llinstruction):
                 """EOF is expressed as a JGE that always jumps to 0."""
                 def __init__(self, op1, op2):
                     if op1 != 0:
                         raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)
                     if op2 != 0:
                         raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)
                 def __str__(self):
                     return r'EOF'
                 def __eq__(self, other):
                     return type(self) == type(other)
                 def encode(self):
                     return _llentry.pack(0, 0)
                 def execute(self, rev, pc, emit):
                     return None
             class _jl(_llinstruction):
                 """If the current rev is less than op1, jump to op2."""
                 def __init__(self, op1, op2):
                     self._cmprev = op1
                     self._target = op2
                 def __str__(self):
                     return 'JL %d %d' % (self._cmprev, self._target)
                 def __eq__(self, other):
                     return (
                         type(self) == type(other)
                         and self._cmprev == other._cmprev
                         and self._target == other._target
                     )
                 def encode(self):
                     return _llentry.pack(1 | (self._cmprev << 2), self._target)
                 def execute(self, rev, pc, emit):
                     if rev < self._cmprev:
                         return self._target
                     return pc + 1
             class _line(_llinstruction):
                 """Emit a line."""
                 def __init__(self, op1, op2):
                     # This line was introduced by this revision number.
                     self._rev = op1
                     # This line had the specified line number in the introducing revision.
                     self._origlineno = op2
                 def __str__(self):
                     return 'LINE %d %d' % (self._rev, self._origlineno)
                 def __eq__(self, other):
                     return (
                         type(self) == type(other)
                         and self._rev == other._rev
                         and self._origlineno == other._origlineno
                     )
                 def encode(self):
                     return _llentry.pack(2 | (self._rev << 2), self._origlineno)
                 def execute(self, rev, pc, emit):
                     emit(lineinfo(self._rev, self._origlineno, pc))
                     return pc + 1
             def _decodeone(data, offset):
                 """Decode a single linelog instruction from an offset in a buffer."""
                 try:
                     op1, op2 = _llentry.unpack_from(data, offset)
                 except struct.error as e:
                     raise LineLogError(b'reading an instruction failed: %r' % e)
                 opcode = op1 & 0b11
                 op1 = op1 >> 2
                 if opcode == 0:
                     if op1 == 0:
                         if op2 == 0:
                             return _eof(op1, op2)
                         return _jump(op1, op2)
                     return _jge(op1, op2)
                 elif opcode == 1:
                     return _jl(op1, op2)
                 elif opcode == 2:
                     return _line(op1, op2)
                 raise NotImplementedError(b'Unimplemented opcode %r' % opcode)
             class linelog:
                 """Efficient cache for per-line history information."""
                 def __init__(self, program=None, maxrev=0):
                     if program is None:
                         # We pad the program with an extra leading EOF so that our
                         # offsets will match the C code exactly. This means we can
                         # interoperate with the C code.
                         program = [_eof(0, 0), _eof(0, 0)]
                     self._program = program
                     self._lastannotate = None
                     self._maxrev = maxrev
                 def __eq__(self, other):
                     return (
                         type(self) == type(other)
                         and self._program == other._program
                         and self._maxrev == other._maxrev
                     )
                 def __repr__(self):
                     return '<linelog at %s: maxrev=%d size=%d>' % (
                         hex(id(self)),
                         self._maxrev,
                         len(self._program),
                     )
                 def debugstr(self):
                     fmt = '%%%dd %%s' % len(str(len(self._program)))
                     return pycompat.sysstr(b'\n').join(
                         fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)
                     )
                 @classmethod
                 def fromdata(cls, buf):
                     if len(buf) % _llentry.size != 0:
                         raise LineLogError(
                             b"invalid linelog buffer size %d (must be a multiple of %d)"
                             % (len(buf), _llentry.size)
                         )
                     expected = len(buf) / _llentry.size
                     fakejge = _decodeone(buf, 0)
                     if isinstance(fakejge, _jump):
                         maxrev = 0
                     elif isinstance(fakejge, (_jge, _jl)):
                         maxrev = fakejge._cmprev
                     else:
                         raise LineLogError(
                             'Expected one of _jump, _jge, or _jl. Got %s.'
                             % type(fakejge).__name__
                         )
                     assert isinstance(fakejge, (_jump, _jge, _jl))  # help pytype
                     numentries = fakejge._target
                     if expected != numentries:
                         raise LineLogError(
                             b"corrupt linelog data: claimed"
                             b" %d entries but given data for %d entries"
                             % (expected, numentries)
                         )
                     instructions = [_eof(0, 0)]
                     for offset in range(1, numentries):
                         instructions.append(_decodeone(buf, offset * _llentry.size))
                     return cls(instructions, maxrev=maxrev)
                 def encode(self):
                     hdr = _jge(self._maxrev, len(self._program)).encode()
                     return hdr + b''.join(i.encode() for i in self._program[1:])
                 def clear(self):
                     self._program = []
                     self._maxrev = 0
                     self._lastannotate = None
                 def replacelines_vec(self, rev, a1, a2, blines):
                     return self.replacelines(
                         rev, a1, a2, 0, len(blines), _internal_blines=blines
                     )
                 def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
                     """Replace lines [a1, a2) with lines [b1, b2)."""
                     if self._lastannotate:
                         # TODO(augie): make replacelines() accept a revision at
                         # which we're editing as well as a revision to mark
                         # responsible for the edits. In hg-experimental it's
                         # stateful like this, so we're doing the same thing to
                         # retain compatibility with absorb until that's imported.
                         ar = self._lastannotate
                     else:
                         ar = self.annotate(rev)
                         #        ar = self.annotate(self._maxrev)
                     if a1 > len(ar.lines):
                         raise LineLogError(
                             b'%d contains %d lines, tried to access line %d'
                             % (rev, len(ar.lines), a1)
                         )
                     elif a1 == len(ar.lines):
                         # Simulated EOF instruction since we're at EOF, which
                         # doesn't have a "real" line.
                         a1inst = _eof(0, 0)
                         a1info = lineinfo(0, 0, ar._eof)
                     else:
                         a1info = ar.lines[a1]
                         a1inst = self._program[a1info._offset]
                     programlen = self._program.__len__
                     oldproglen = programlen()
                     appendinst = self._program.append
                     # insert
                     blineinfos = []
                     bappend = blineinfos.append
                     if b1 < b2:
                         # Determine the jump target for the JGE at the start of
                         # the new block.
                         tgt = oldproglen + (b2 - b1 + 1)
                         # Jump to skip the insert if we're at an older revision.
                         appendinst(_jl(rev, tgt))
                         for linenum in range(b1, b2):
                             if _internal_blines is None:
                                 bappend(lineinfo(rev, linenum, programlen()))
                                 appendinst(_line(rev, linenum))
                             else:
                                 newrev, newlinenum = _internal_blines[linenum]
                                 bappend(lineinfo(newrev, newlinenum, programlen()))
                                 appendinst(_line(newrev, newlinenum))
                     # delete
                     if a1 < a2:
                         if a2 > len(ar.lines):
                             raise LineLogError(
                                 b'%d contains %d lines, tried to access line %d'
                                 % (rev, len(ar.lines), a2)
                             )
                         elif a2 == len(ar.lines):
                             endaddr = ar._eof
                         else:
                             endaddr = ar.lines[a2]._offset
                         if a2 > 0 and rev < self._maxrev:
                             # If we're here, we're deleting a chunk of an old
                             # commit, so we need to be careful and not touch
                             # invisible lines between a2-1 and a2 (IOW, lines that
                             # are added later).
                             endaddr = ar.lines[a2 - 1]._offset + 1
                         appendinst(_jge(rev, endaddr))
                     # copy instruction from a1
                     a1instpc = programlen()
                     appendinst(a1inst)
                     # if a1inst isn't a jump or EOF, then we need to add an unconditional
                     # jump back into the program here.
                     if not isinstance(a1inst, (_jump, _eof)):
                         appendinst(_jump(0, a1info._offset + 1))
                     # Patch instruction at a1, which makes our patch live.
                     self._program[a1info._offset] = _jump(0, oldproglen)
                     # Update self._lastannotate in place. This serves as a cache to avoid
                     # expensive "self.annotate" in this function, when "replacelines" is
                     # used continuously.
                     if len(self._lastannotate.lines) > a1:
                         self._lastannotate.lines[a1]._offset = a1instpc
                     else:
                         assert isinstance(a1inst, _eof)
                         self._lastannotate._eof = a1instpc
                     self._lastannotate.lines[a1:a2] = blineinfos
                     self._lastannotate.rev = max(self._lastannotate.rev, rev)
                     if rev > self._maxrev:
                         self._maxrev = rev
                 def annotate(self, rev):
                     pc = 1
                     lines = []
                     executed = 0
                     # Sanity check: if instructions executed exceeds len(program), we
                     # hit an infinite loop in the linelog program somehow and we
                     # should stop.
                     while pc is not None and executed < len(self._program):
                         inst = self._program[pc]
                         lastpc = pc
                         pc = inst.execute(rev, pc, lines.append)
                         executed += 1
                     if pc is not None:
                         raise LineLogError(
                             r'Probably hit an infinite loop in linelog. Program:\n'
                             + self.debugstr()
                         )
                     ar = annotateresult(rev, lines, lastpc)
                     self._lastannotate = ar
                     return ar
                 @property
                 def maxrev(self):
                     return self._maxrev
                 # Stateful methods which depend on the value of the last
                 # annotation run. This API is for compatiblity with the original
                 # linelog, and we should probably consider refactoring it.
                 @property
                 def annotateresult(self):
                     """Return the last annotation result. C linelog code exposed this."""
                     return [(l.rev, l.linenum) for l in self._lastannotate.lines]
                 def getoffset(self, line):
                     return self._lastannotate.lines[line]._offset
                 def getalllines(self, start=0, end=0):
                     """Get all lines that ever occurred in [start, end).
                     Passing start == end == 0 means "all lines ever".
                     This works in terms of *internal* program offsets, not line numbers.
                     """
                     pc = start or 1
                     lines = []
                     # only take as many steps as there are instructions in the
                     # program - if we don't find an EOF or our stop-line before
                     # then, something is badly broken.
                     for step in range(len(self._program)):
                         inst = self._program[pc]
                         nextpc = pc + 1
                         if isinstance(inst, _jump):
                             nextpc = inst._target
                         elif isinstance(inst, _eof):
                             return lines
                         elif isinstance(inst, (_jl, _jge)):
                             pass
                         elif isinstance(inst, _line):
                             lines.append((inst._rev, inst._origlineno))
                         else:
                             raise LineLogError(b"Illegal instruction %r" % inst)
                         if nextpc == end:
                             return lines
                         pc = nextpc
                     raise LineLogError(b"Failed to perform getalllines")