##// END OF EJS Templates
parsers: add a function to efficiently lowercase ASCII strings...
Siddharth Agarwal -
r22778:80f2b63d default
parent child Browse files
Show More
@@ -1,423 +1,432 b''
1 1 # encoding.py - character transcoding support for Mercurial
2 2 #
3 3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 import error
8 import error, parsers
9 9 import unicodedata, locale, os
10 10
11 11 def _getpreferredencoding():
12 12 '''
13 13 On darwin, getpreferredencoding ignores the locale environment and
14 14 always returns mac-roman. http://bugs.python.org/issue6202 fixes this
15 15 for Python 2.7 and up. This is the same corrected code for earlier
16 16 Python versions.
17 17
18 18 However, we can't use a version check for this method, as some distributions
19 19 patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
20 20 encoding, as it is unlikely that this encoding is the actually expected.
21 21 '''
22 22 try:
23 23 locale.CODESET
24 24 except AttributeError:
25 25 # Fall back to parsing environment variables :-(
26 26 return locale.getdefaultlocale()[1]
27 27
28 28 oldloc = locale.setlocale(locale.LC_CTYPE)
29 29 locale.setlocale(locale.LC_CTYPE, "")
30 30 result = locale.nl_langinfo(locale.CODESET)
31 31 locale.setlocale(locale.LC_CTYPE, oldloc)
32 32
33 33 return result
34 34
35 35 _encodingfixers = {
36 36 '646': lambda: 'ascii',
37 37 'ANSI_X3.4-1968': lambda: 'ascii',
38 38 'mac-roman': _getpreferredencoding
39 39 }
40 40
41 41 try:
42 42 encoding = os.environ.get("HGENCODING")
43 43 if not encoding:
44 44 encoding = locale.getpreferredencoding() or 'ascii'
45 45 encoding = _encodingfixers.get(encoding, lambda: encoding)()
46 46 except locale.Error:
47 47 encoding = 'ascii'
48 48 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
49 49 fallbackencoding = 'ISO-8859-1'
50 50
51 51 class localstr(str):
52 52 '''This class allows strings that are unmodified to be
53 53 round-tripped to the local encoding and back'''
54 54 def __new__(cls, u, l):
55 55 s = str.__new__(cls, l)
56 56 s._utf8 = u
57 57 return s
58 58 def __hash__(self):
59 59 return hash(self._utf8) # avoid collisions in local string space
60 60
61 61 def tolocal(s):
62 62 """
63 63 Convert a string from internal UTF-8 to local encoding
64 64
65 65 All internal strings should be UTF-8 but some repos before the
66 66 implementation of locale support may contain latin1 or possibly
67 67 other character sets. We attempt to decode everything strictly
68 68 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
69 69 replace unknown characters.
70 70
71 71 The localstr class is used to cache the known UTF-8 encoding of
72 72 strings next to their local representation to allow lossless
73 73 round-trip conversion back to UTF-8.
74 74
75 75 >>> u = 'foo: \\xc3\\xa4' # utf-8
76 76 >>> l = tolocal(u)
77 77 >>> l
78 78 'foo: ?'
79 79 >>> fromlocal(l)
80 80 'foo: \\xc3\\xa4'
81 81 >>> u2 = 'foo: \\xc3\\xa1'
82 82 >>> d = { l: 1, tolocal(u2): 2 }
83 83 >>> len(d) # no collision
84 84 2
85 85 >>> 'foo: ?' in d
86 86 False
87 87 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
88 88 >>> l = tolocal(l1)
89 89 >>> l
90 90 'foo: ?'
91 91 >>> fromlocal(l) # magically in utf-8
92 92 'foo: \\xc3\\xa4'
93 93 """
94 94
95 95 try:
96 96 try:
97 97 # make sure string is actually stored in UTF-8
98 98 u = s.decode('UTF-8')
99 99 if encoding == 'UTF-8':
100 100 # fast path
101 101 return s
102 102 r = u.encode(encoding, "replace")
103 103 if u == r.decode(encoding):
104 104 # r is a safe, non-lossy encoding of s
105 105 return r
106 106 return localstr(s, r)
107 107 except UnicodeDecodeError:
108 108 # we should only get here if we're looking at an ancient changeset
109 109 try:
110 110 u = s.decode(fallbackencoding)
111 111 r = u.encode(encoding, "replace")
112 112 if u == r.decode(encoding):
113 113 # r is a safe, non-lossy encoding of s
114 114 return r
115 115 return localstr(u.encode('UTF-8'), r)
116 116 except UnicodeDecodeError:
117 117 u = s.decode("utf-8", "replace") # last ditch
118 118 return u.encode(encoding, "replace") # can't round-trip
119 119 except LookupError, k:
120 120 raise error.Abort(k, hint="please check your locale settings")
121 121
122 122 def fromlocal(s):
123 123 """
124 124 Convert a string from the local character encoding to UTF-8
125 125
126 126 We attempt to decode strings using the encoding mode set by
127 127 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
128 128 characters will cause an error message. Other modes include
129 129 'replace', which replaces unknown characters with a special
130 130 Unicode character, and 'ignore', which drops the character.
131 131 """
132 132
133 133 # can we do a lossless round-trip?
134 134 if isinstance(s, localstr):
135 135 return s._utf8
136 136
137 137 try:
138 138 return s.decode(encoding, encodingmode).encode("utf-8")
139 139 except UnicodeDecodeError, inst:
140 140 sub = s[max(0, inst.start - 10):inst.start + 10]
141 141 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
142 142 except LookupError, k:
143 143 raise error.Abort(k, hint="please check your locale settings")
144 144
145 145 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
146 146 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
147 147 and "WFA" or "WF")
148 148
149 149 def colwidth(s):
150 150 "Find the column width of a string for display in the local encoding"
151 151 return ucolwidth(s.decode(encoding, 'replace'))
152 152
153 153 def ucolwidth(d):
154 154 "Find the column width of a Unicode string for display"
155 155 eaw = getattr(unicodedata, 'east_asian_width', None)
156 156 if eaw is not None:
157 157 return sum([eaw(c) in wide and 2 or 1 for c in d])
158 158 return len(d)
159 159
160 160 def getcols(s, start, c):
161 161 '''Use colwidth to find a c-column substring of s starting at byte
162 162 index start'''
163 163 for x in xrange(start + c, len(s)):
164 164 t = s[start:x]
165 165 if colwidth(t) == c:
166 166 return t
167 167
168 168 def trim(s, width, ellipsis='', leftside=False):
169 169 """Trim string 's' to at most 'width' columns (including 'ellipsis').
170 170
171 171 If 'leftside' is True, left side of string 's' is trimmed.
172 172 'ellipsis' is always placed at trimmed side.
173 173
174 174 >>> ellipsis = '+++'
175 175 >>> from mercurial import encoding
176 176 >>> encoding.encoding = 'utf-8'
177 177 >>> t= '1234567890'
178 178 >>> print trim(t, 12, ellipsis=ellipsis)
179 179 1234567890
180 180 >>> print trim(t, 10, ellipsis=ellipsis)
181 181 1234567890
182 182 >>> print trim(t, 8, ellipsis=ellipsis)
183 183 12345+++
184 184 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
185 185 +++67890
186 186 >>> print trim(t, 8)
187 187 12345678
188 188 >>> print trim(t, 8, leftside=True)
189 189 34567890
190 190 >>> print trim(t, 3, ellipsis=ellipsis)
191 191 +++
192 192 >>> print trim(t, 1, ellipsis=ellipsis)
193 193 +
194 194 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
195 195 >>> t = u.encode(encoding.encoding)
196 196 >>> print trim(t, 12, ellipsis=ellipsis)
197 197 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
198 198 >>> print trim(t, 10, ellipsis=ellipsis)
199 199 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
200 200 >>> print trim(t, 8, ellipsis=ellipsis)
201 201 \xe3\x81\x82\xe3\x81\x84+++
202 202 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
203 203 +++\xe3\x81\x88\xe3\x81\x8a
204 204 >>> print trim(t, 5)
205 205 \xe3\x81\x82\xe3\x81\x84
206 206 >>> print trim(t, 5, leftside=True)
207 207 \xe3\x81\x88\xe3\x81\x8a
208 208 >>> print trim(t, 4, ellipsis=ellipsis)
209 209 +++
210 210 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
211 211 +++
212 212 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
213 213 >>> print trim(t, 12, ellipsis=ellipsis)
214 214 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
215 215 >>> print trim(t, 10, ellipsis=ellipsis)
216 216 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
217 217 >>> print trim(t, 8, ellipsis=ellipsis)
218 218 \x11\x22\x33\x44\x55+++
219 219 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
220 220 +++\x66\x77\x88\x99\xaa
221 221 >>> print trim(t, 8)
222 222 \x11\x22\x33\x44\x55\x66\x77\x88
223 223 >>> print trim(t, 8, leftside=True)
224 224 \x33\x44\x55\x66\x77\x88\x99\xaa
225 225 >>> print trim(t, 3, ellipsis=ellipsis)
226 226 +++
227 227 >>> print trim(t, 1, ellipsis=ellipsis)
228 228 +
229 229 """
230 230 try:
231 231 u = s.decode(encoding)
232 232 except UnicodeDecodeError:
233 233 if len(s) <= width: # trimming is not needed
234 234 return s
235 235 width -= len(ellipsis)
236 236 if width <= 0: # no enough room even for ellipsis
237 237 return ellipsis[:width + len(ellipsis)]
238 238 if leftside:
239 239 return ellipsis + s[-width:]
240 240 return s[:width] + ellipsis
241 241
242 242 if ucolwidth(u) <= width: # trimming is not needed
243 243 return s
244 244
245 245 width -= len(ellipsis)
246 246 if width <= 0: # no enough room even for ellipsis
247 247 return ellipsis[:width + len(ellipsis)]
248 248
249 249 if leftside:
250 250 uslice = lambda i: u[i:]
251 251 concat = lambda s: ellipsis + s
252 252 else:
253 253 uslice = lambda i: u[:-i]
254 254 concat = lambda s: s + ellipsis
255 255 for i in xrange(1, len(u)):
256 256 usub = uslice(i)
257 257 if ucolwidth(usub) <= width:
258 258 return concat(usub.encode(encoding))
259 259 return ellipsis # no enough room for multi-column characters
260 260
261 def asciilower(s):
262 '''convert a string to lowercase if ASCII
263
264 Raises UnicodeDecodeError if non-ASCII characters are found.'''
265 s.decode('ascii')
266 return s.lower()
267
268 asciilower = getattr(parsers, 'asciilower', asciilower)
269
261 270 def lower(s):
262 271 "best-effort encoding-aware case-folding of local string s"
263 272 try:
264 273 s.decode('ascii') # throw exception for non-ASCII character
265 274 return s.lower()
266 275 except UnicodeDecodeError:
267 276 pass
268 277 try:
269 278 if isinstance(s, localstr):
270 279 u = s._utf8.decode("utf-8")
271 280 else:
272 281 u = s.decode(encoding, encodingmode)
273 282
274 283 lu = u.lower()
275 284 if u == lu:
276 285 return s # preserve localstring
277 286 return lu.encode(encoding)
278 287 except UnicodeError:
279 288 return s.lower() # we don't know how to fold this except in ASCII
280 289 except LookupError, k:
281 290 raise error.Abort(k, hint="please check your locale settings")
282 291
283 292 def upper(s):
284 293 "best-effort encoding-aware case-folding of local string s"
285 294 try:
286 295 s.decode('ascii') # throw exception for non-ASCII character
287 296 return s.upper()
288 297 except UnicodeDecodeError:
289 298 pass
290 299 try:
291 300 if isinstance(s, localstr):
292 301 u = s._utf8.decode("utf-8")
293 302 else:
294 303 u = s.decode(encoding, encodingmode)
295 304
296 305 uu = u.upper()
297 306 if u == uu:
298 307 return s # preserve localstring
299 308 return uu.encode(encoding)
300 309 except UnicodeError:
301 310 return s.upper() # we don't know how to fold this except in ASCII
302 311 except LookupError, k:
303 312 raise error.Abort(k, hint="please check your locale settings")
304 313
305 314 _jsonmap = {}
306 315
307 316 def jsonescape(s):
308 317 '''returns a string suitable for JSON
309 318
310 319 JSON is problematic for us because it doesn't support non-Unicode
311 320 bytes. To deal with this, we take the following approach:
312 321
313 322 - localstr objects are converted back to UTF-8
314 323 - valid UTF-8/ASCII strings are passed as-is
315 324 - other strings are converted to UTF-8b surrogate encoding
316 325 - apply JSON-specified string escaping
317 326
318 327 (escapes are doubled in these tests)
319 328
320 329 >>> jsonescape('this is a test')
321 330 'this is a test'
322 331 >>> jsonescape('escape characters: \\0 \\x0b \\t \\n \\r \\" \\\\')
323 332 'escape characters: \\\\u0000 \\\\u000b \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
324 333 >>> jsonescape('a weird byte: \\xdd')
325 334 'a weird byte: \\xed\\xb3\\x9d'
326 335 >>> jsonescape('utf-8: caf\\xc3\\xa9')
327 336 'utf-8: caf\\xc3\\xa9'
328 337 >>> jsonescape('')
329 338 ''
330 339 '''
331 340
332 341 if not _jsonmap:
333 342 for x in xrange(32):
334 343 _jsonmap[chr(x)] = "\u%04x" %x
335 344 for x in xrange(32, 256):
336 345 c = chr(x)
337 346 _jsonmap[c] = c
338 347 _jsonmap['\t'] = '\\t'
339 348 _jsonmap['\n'] = '\\n'
340 349 _jsonmap['\"'] = '\\"'
341 350 _jsonmap['\\'] = '\\\\'
342 351 _jsonmap['\b'] = '\\b'
343 352 _jsonmap['\f'] = '\\f'
344 353 _jsonmap['\r'] = '\\r'
345 354
346 355 return ''.join(_jsonmap[c] for c in toutf8b(s))
347 356
348 357 def toutf8b(s):
349 358 '''convert a local, possibly-binary string into UTF-8b
350 359
351 360 This is intended as a generic method to preserve data when working
352 361 with schemes like JSON and XML that have no provision for
353 362 arbitrary byte strings. As Mercurial often doesn't know
354 363 what encoding data is in, we use so-called UTF-8b.
355 364
356 365 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
357 366 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
358 367 uDC00-uDCFF.
359 368
360 369 Principles of operation:
361 370
362 371 - ASCII and UTF-8 data successfully round-trips and is understood
363 372 by Unicode-oriented clients
364 373 - filenames and file contents in arbitrary other encodings can have
365 374 be round-tripped or recovered by clueful clients
366 375 - local strings that have a cached known UTF-8 encoding (aka
367 376 localstr) get sent as UTF-8 so Unicode-oriented clients get the
368 377 Unicode data they want
369 378 - because we must preserve UTF-8 bytestring in places such as
370 379 filenames, metadata can't be roundtripped without help
371 380
372 381 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
373 382 arbitrary bytes into an internal Unicode format that can be
374 383 re-encoded back into the original. Here we are exposing the
375 384 internal surrogate encoding as a UTF-8 string.)
376 385 '''
377 386
378 387 if isinstance(s, localstr):
379 388 return s._utf8
380 389
381 390 try:
382 391 s.decode('utf-8')
383 392 return s
384 393 except UnicodeDecodeError:
385 394 # surrogate-encode any characters that don't round-trip
386 395 s2 = s.decode('utf-8', 'ignore').encode('utf-8')
387 396 r = ""
388 397 pos = 0
389 398 for c in s:
390 399 if s2[pos:pos + 1] == c:
391 400 r += c
392 401 pos += 1
393 402 else:
394 403 r += unichr(0xdc00 + ord(c)).encode('utf-8')
395 404 return r
396 405
397 406 def fromutf8b(s):
398 407 '''Given a UTF-8b string, return a local, possibly-binary string.
399 408
400 409 return the original binary string. This
401 410 is a round-trip process for strings like filenames, but metadata
402 411 that's was passed through tolocal will remain in UTF-8.
403 412
404 413 >>> m = "\\xc3\\xa9\\x99abcd"
405 414 >>> n = toutf8b(m)
406 415 >>> n
407 416 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
408 417 >>> fromutf8b(n) == m
409 418 True
410 419 '''
411 420
412 421 # fast path - look for uDxxx prefixes in s
413 422 if "\xed" not in s:
414 423 return s
415 424
416 425 u = s.decode("utf-8")
417 426 r = ""
418 427 for c in u:
419 428 if ord(c) & 0xff00 == 0xdc00:
420 429 r += chr(ord(c) & 0xff)
421 430 else:
422 431 r += c.encode("utf-8")
423 432 return r
@@ -1,2253 +1,2308 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static char *versionerrortext = "Python minor version mismatch";
18 18
19 19 static int8_t hextable[256] = {
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22 22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
23 23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
36 36 };
37 37
38 static char lowertable[128] = {
39 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
40 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
41 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
42 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
43 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
44 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
45 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
46 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
47 '\x40',
48 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
49 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
50 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
51 '\x78', '\x79', '\x7a', /* X-Z */
52 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
53 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
54 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
55 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
56 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
57 };
58
38 59 static inline int hexdigit(const char *p, Py_ssize_t off)
39 60 {
40 61 int8_t val = hextable[(unsigned char)p[off]];
41 62
42 63 if (val >= 0) {
43 64 return val;
44 65 }
45 66
46 67 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
47 68 return 0;
48 69 }
49 70
50 71 /*
51 72 * Turn a hex-encoded string into binary.
52 73 */
53 74 static PyObject *unhexlify(const char *str, int len)
54 75 {
55 76 PyObject *ret;
56 77 char *d;
57 78 int i;
58 79
59 80 ret = PyBytes_FromStringAndSize(NULL, len / 2);
60 81
61 82 if (!ret)
62 83 return NULL;
63 84
64 85 d = PyBytes_AsString(ret);
65 86
66 87 for (i = 0; i < len;) {
67 88 int hi = hexdigit(str, i++);
68 89 int lo = hexdigit(str, i++);
69 90 *d++ = (hi << 4) | lo;
70 91 }
71 92
72 93 return ret;
73 94 }
74 95
96 static PyObject *asciilower(PyObject *self, PyObject *args)
97 {
98 char *str, *newstr;
99 int i, len;
100 PyObject *newobj = NULL;
101
102 if (!PyArg_ParseTuple(args, "s#", &str, &len))
103 goto quit;
104
105 newobj = PyBytes_FromStringAndSize(NULL, len);
106 if (!newobj)
107 goto quit;
108
109 newstr = PyBytes_AS_STRING(newobj);
110
111 for (i = 0; i < len; i++) {
112 char c = str[i];
113 if (c & 0x80) {
114 PyObject *err = PyUnicodeDecodeError_Create(
115 "ascii", str, len, i, (i + 1),
116 "unexpected code byte");
117 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
118 goto quit;
119 }
120 newstr[i] = lowertable[(unsigned char)c];
121 }
122
123 return newobj;
124 quit:
125 Py_XDECREF(newobj);
126 return NULL;
127 }
128
75 129 /*
76 130 * This code assumes that a manifest is stitched together with newline
77 131 * ('\n') characters.
78 132 */
79 133 static PyObject *parse_manifest(PyObject *self, PyObject *args)
80 134 {
81 135 PyObject *mfdict, *fdict;
82 136 char *str, *start, *end;
83 137 int len;
84 138
85 139 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
86 140 &PyDict_Type, &mfdict,
87 141 &PyDict_Type, &fdict,
88 142 &str, &len))
89 143 goto quit;
90 144
91 145 start = str;
92 146 end = str + len;
93 147 while (start < end) {
94 148 PyObject *file = NULL, *node = NULL;
95 149 PyObject *flags = NULL;
96 150 char *zero = NULL, *newline = NULL;
97 151 ptrdiff_t nlen;
98 152
99 153 zero = memchr(start, '\0', end - start);
100 154 if (!zero) {
101 155 PyErr_SetString(PyExc_ValueError,
102 156 "manifest entry has no separator");
103 157 goto quit;
104 158 }
105 159
106 160 newline = memchr(zero + 1, '\n', end - (zero + 1));
107 161 if (!newline) {
108 162 PyErr_SetString(PyExc_ValueError,
109 163 "manifest contains trailing garbage");
110 164 goto quit;
111 165 }
112 166
113 167 file = PyBytes_FromStringAndSize(start, zero - start);
114 168
115 169 if (!file)
116 170 goto bail;
117 171
118 172 nlen = newline - zero - 1;
119 173
120 174 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
121 175 if (!node)
122 176 goto bail;
123 177
124 178 if (nlen > 40) {
125 179 flags = PyBytes_FromStringAndSize(zero + 41,
126 180 nlen - 40);
127 181 if (!flags)
128 182 goto bail;
129 183
130 184 if (PyDict_SetItem(fdict, file, flags) == -1)
131 185 goto bail;
132 186 }
133 187
134 188 if (PyDict_SetItem(mfdict, file, node) == -1)
135 189 goto bail;
136 190
137 191 start = newline + 1;
138 192
139 193 Py_XDECREF(flags);
140 194 Py_XDECREF(node);
141 195 Py_XDECREF(file);
142 196 continue;
143 197 bail:
144 198 Py_XDECREF(flags);
145 199 Py_XDECREF(node);
146 200 Py_XDECREF(file);
147 201 goto quit;
148 202 }
149 203
150 204 Py_INCREF(Py_None);
151 205 return Py_None;
152 206 quit:
153 207 return NULL;
154 208 }
155 209
156 210 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
157 211 int size, int mtime)
158 212 {
159 213 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
160 214 &dirstateTupleType);
161 215 if (!t)
162 216 return NULL;
163 217 t->state = state;
164 218 t->mode = mode;
165 219 t->size = size;
166 220 t->mtime = mtime;
167 221 return t;
168 222 }
169 223
170 224 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
171 225 PyObject *kwds)
172 226 {
173 227 /* We do all the initialization here and not a tp_init function because
174 228 * dirstate_tuple is immutable. */
175 229 dirstateTupleObject *t;
176 230 char state;
177 231 int size, mode, mtime;
178 232 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
179 233 return NULL;
180 234
181 235 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
182 236 if (!t)
183 237 return NULL;
184 238 t->state = state;
185 239 t->mode = mode;
186 240 t->size = size;
187 241 t->mtime = mtime;
188 242
189 243 return (PyObject *)t;
190 244 }
191 245
192 246 static void dirstate_tuple_dealloc(PyObject *o)
193 247 {
194 248 PyObject_Del(o);
195 249 }
196 250
197 251 static Py_ssize_t dirstate_tuple_length(PyObject *o)
198 252 {
199 253 return 4;
200 254 }
201 255
202 256 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
203 257 {
204 258 dirstateTupleObject *t = (dirstateTupleObject *)o;
205 259 switch (i) {
206 260 case 0:
207 261 return PyBytes_FromStringAndSize(&t->state, 1);
208 262 case 1:
209 263 return PyInt_FromLong(t->mode);
210 264 case 2:
211 265 return PyInt_FromLong(t->size);
212 266 case 3:
213 267 return PyInt_FromLong(t->mtime);
214 268 default:
215 269 PyErr_SetString(PyExc_IndexError, "index out of range");
216 270 return NULL;
217 271 }
218 272 }
219 273
220 274 static PySequenceMethods dirstate_tuple_sq = {
221 275 dirstate_tuple_length, /* sq_length */
222 276 0, /* sq_concat */
223 277 0, /* sq_repeat */
224 278 dirstate_tuple_item, /* sq_item */
225 279 0, /* sq_ass_item */
226 280 0, /* sq_contains */
227 281 0, /* sq_inplace_concat */
228 282 0 /* sq_inplace_repeat */
229 283 };
230 284
231 285 PyTypeObject dirstateTupleType = {
232 286 PyVarObject_HEAD_INIT(NULL, 0)
233 287 "dirstate_tuple", /* tp_name */
234 288 sizeof(dirstateTupleObject),/* tp_basicsize */
235 289 0, /* tp_itemsize */
236 290 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
237 291 0, /* tp_print */
238 292 0, /* tp_getattr */
239 293 0, /* tp_setattr */
240 294 0, /* tp_compare */
241 295 0, /* tp_repr */
242 296 0, /* tp_as_number */
243 297 &dirstate_tuple_sq, /* tp_as_sequence */
244 298 0, /* tp_as_mapping */
245 299 0, /* tp_hash */
246 300 0, /* tp_call */
247 301 0, /* tp_str */
248 302 0, /* tp_getattro */
249 303 0, /* tp_setattro */
250 304 0, /* tp_as_buffer */
251 305 Py_TPFLAGS_DEFAULT, /* tp_flags */
252 306 "dirstate tuple", /* tp_doc */
253 307 0, /* tp_traverse */
254 308 0, /* tp_clear */
255 309 0, /* tp_richcompare */
256 310 0, /* tp_weaklistoffset */
257 311 0, /* tp_iter */
258 312 0, /* tp_iternext */
259 313 0, /* tp_methods */
260 314 0, /* tp_members */
261 315 0, /* tp_getset */
262 316 0, /* tp_base */
263 317 0, /* tp_dict */
264 318 0, /* tp_descr_get */
265 319 0, /* tp_descr_set */
266 320 0, /* tp_dictoffset */
267 321 0, /* tp_init */
268 322 0, /* tp_alloc */
269 323 dirstate_tuple_new, /* tp_new */
270 324 };
271 325
272 326 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
273 327 {
274 328 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
275 329 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
276 330 char state, *cur, *str, *cpos;
277 331 int mode, size, mtime;
278 332 unsigned int flen, len, pos = 40;
279 333 int readlen;
280 334
281 335 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
282 336 &PyDict_Type, &dmap,
283 337 &PyDict_Type, &cmap,
284 338 &str, &readlen))
285 339 goto quit;
286 340
287 341 if (readlen < 0)
288 342 goto quit;
289 343
290 344 len = readlen;
291 345
292 346 /* read parents */
293 347 if (len < 40)
294 348 goto quit;
295 349
296 350 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
297 351 if (!parents)
298 352 goto quit;
299 353
300 354 /* read filenames */
301 355 while (pos >= 40 && pos < len) {
302 356 cur = str + pos;
303 357 /* unpack header */
304 358 state = *cur;
305 359 mode = getbe32(cur + 1);
306 360 size = getbe32(cur + 5);
307 361 mtime = getbe32(cur + 9);
308 362 flen = getbe32(cur + 13);
309 363 pos += 17;
310 364 cur += 17;
311 365 if (flen > len - pos) {
312 366 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
313 367 goto quit;
314 368 }
315 369
316 370 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
317 371 mtime);
318 372 cpos = memchr(cur, 0, flen);
319 373 if (cpos) {
320 374 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
321 375 cname = PyBytes_FromStringAndSize(cpos + 1,
322 376 flen - (cpos - cur) - 1);
323 377 if (!fname || !cname ||
324 378 PyDict_SetItem(cmap, fname, cname) == -1 ||
325 379 PyDict_SetItem(dmap, fname, entry) == -1)
326 380 goto quit;
327 381 Py_DECREF(cname);
328 382 } else {
329 383 fname = PyBytes_FromStringAndSize(cur, flen);
330 384 if (!fname ||
331 385 PyDict_SetItem(dmap, fname, entry) == -1)
332 386 goto quit;
333 387 }
334 388 Py_DECREF(fname);
335 389 Py_DECREF(entry);
336 390 fname = cname = entry = NULL;
337 391 pos += flen;
338 392 }
339 393
340 394 ret = parents;
341 395 Py_INCREF(ret);
342 396 quit:
343 397 Py_XDECREF(fname);
344 398 Py_XDECREF(cname);
345 399 Py_XDECREF(entry);
346 400 Py_XDECREF(parents);
347 401 return ret;
348 402 }
349 403
350 404 /*
351 405 * Efficiently pack a dirstate object into its on-disk format.
352 406 */
353 407 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
354 408 {
355 409 PyObject *packobj = NULL;
356 410 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
357 411 Py_ssize_t nbytes, pos, l;
358 412 PyObject *k, *v, *pn;
359 413 char *p, *s;
360 414 double now;
361 415
362 416 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
363 417 &PyDict_Type, &map, &PyDict_Type, &copymap,
364 418 &pl, &now))
365 419 return NULL;
366 420
367 421 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
368 422 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
369 423 return NULL;
370 424 }
371 425
372 426 /* Figure out how much we need to allocate. */
373 427 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
374 428 PyObject *c;
375 429 if (!PyString_Check(k)) {
376 430 PyErr_SetString(PyExc_TypeError, "expected string key");
377 431 goto bail;
378 432 }
379 433 nbytes += PyString_GET_SIZE(k) + 17;
380 434 c = PyDict_GetItem(copymap, k);
381 435 if (c) {
382 436 if (!PyString_Check(c)) {
383 437 PyErr_SetString(PyExc_TypeError,
384 438 "expected string key");
385 439 goto bail;
386 440 }
387 441 nbytes += PyString_GET_SIZE(c) + 1;
388 442 }
389 443 }
390 444
391 445 packobj = PyString_FromStringAndSize(NULL, nbytes);
392 446 if (packobj == NULL)
393 447 goto bail;
394 448
395 449 p = PyString_AS_STRING(packobj);
396 450
397 451 pn = PySequence_ITEM(pl, 0);
398 452 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
399 453 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
400 454 goto bail;
401 455 }
402 456 memcpy(p, s, l);
403 457 p += 20;
404 458 pn = PySequence_ITEM(pl, 1);
405 459 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
406 460 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
407 461 goto bail;
408 462 }
409 463 memcpy(p, s, l);
410 464 p += 20;
411 465
412 466 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
413 467 dirstateTupleObject *tuple;
414 468 char state;
415 469 uint32_t mode, size, mtime;
416 470 Py_ssize_t len, l;
417 471 PyObject *o;
418 472 char *t;
419 473
420 474 if (!dirstate_tuple_check(v)) {
421 475 PyErr_SetString(PyExc_TypeError,
422 476 "expected a dirstate tuple");
423 477 goto bail;
424 478 }
425 479 tuple = (dirstateTupleObject *)v;
426 480
427 481 state = tuple->state;
428 482 mode = tuple->mode;
429 483 size = tuple->size;
430 484 mtime = tuple->mtime;
431 485 if (state == 'n' && mtime == (uint32_t)now) {
432 486 /* See pure/parsers.py:pack_dirstate for why we do
433 487 * this. */
434 488 mtime = -1;
435 489 mtime_unset = (PyObject *)make_dirstate_tuple(
436 490 state, mode, size, mtime);
437 491 if (!mtime_unset)
438 492 goto bail;
439 493 if (PyDict_SetItem(map, k, mtime_unset) == -1)
440 494 goto bail;
441 495 Py_DECREF(mtime_unset);
442 496 mtime_unset = NULL;
443 497 }
444 498 *p++ = state;
445 499 putbe32(mode, p);
446 500 putbe32(size, p + 4);
447 501 putbe32(mtime, p + 8);
448 502 t = p + 12;
449 503 p += 16;
450 504 len = PyString_GET_SIZE(k);
451 505 memcpy(p, PyString_AS_STRING(k), len);
452 506 p += len;
453 507 o = PyDict_GetItem(copymap, k);
454 508 if (o) {
455 509 *p++ = '\0';
456 510 l = PyString_GET_SIZE(o);
457 511 memcpy(p, PyString_AS_STRING(o), l);
458 512 p += l;
459 513 len += l + 1;
460 514 }
461 515 putbe32((uint32_t)len, t);
462 516 }
463 517
464 518 pos = p - PyString_AS_STRING(packobj);
465 519 if (pos != nbytes) {
466 520 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
467 521 (long)pos, (long)nbytes);
468 522 goto bail;
469 523 }
470 524
471 525 return packobj;
472 526 bail:
473 527 Py_XDECREF(mtime_unset);
474 528 Py_XDECREF(packobj);
475 529 return NULL;
476 530 }
477 531
478 532 /*
479 533 * A base-16 trie for fast node->rev mapping.
480 534 *
481 535 * Positive value is index of the next node in the trie
482 536 * Negative value is a leaf: -(rev + 1)
483 537 * Zero is empty
484 538 */
485 539 typedef struct {
486 540 int children[16];
487 541 } nodetree;
488 542
489 543 /*
490 544 * This class has two behaviours.
491 545 *
492 546 * When used in a list-like way (with integer keys), we decode an
493 547 * entry in a RevlogNG index file on demand. Our last entry is a
494 548 * sentinel, always a nullid. We have limited support for
495 549 * integer-keyed insert and delete, only at elements right before the
496 550 * sentinel.
497 551 *
498 552 * With string keys, we lazily perform a reverse mapping from node to
499 553 * rev, using a base-16 trie.
500 554 */
501 555 typedef struct {
502 556 PyObject_HEAD
503 557 /* Type-specific fields go here. */
504 558 PyObject *data; /* raw bytes of index */
505 559 PyObject **cache; /* cached tuples */
506 560 const char **offsets; /* populated on demand */
507 561 Py_ssize_t raw_length; /* original number of elements */
508 562 Py_ssize_t length; /* current number of elements */
509 563 PyObject *added; /* populated on demand */
510 564 PyObject *headrevs; /* cache, invalidated on changes */
511 565 PyObject *filteredrevs;/* filtered revs set */
512 566 nodetree *nt; /* base-16 trie */
513 567 int ntlength; /* # nodes in use */
514 568 int ntcapacity; /* # nodes allocated */
515 569 int ntdepth; /* maximum depth of tree */
516 570 int ntsplits; /* # splits performed */
517 571 int ntrev; /* last rev scanned */
518 572 int ntlookups; /* # lookups */
519 573 int ntmisses; /* # lookups that miss the cache */
520 574 int inlined;
521 575 } indexObject;
522 576
523 577 static Py_ssize_t index_length(const indexObject *self)
524 578 {
525 579 if (self->added == NULL)
526 580 return self->length;
527 581 return self->length + PyList_GET_SIZE(self->added);
528 582 }
529 583
530 584 static PyObject *nullentry;
531 585 static const char nullid[20];
532 586
533 587 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
534 588
535 589 #if LONG_MAX == 0x7fffffffL
536 590 static char *tuple_format = "Kiiiiiis#";
537 591 #else
538 592 static char *tuple_format = "kiiiiiis#";
539 593 #endif
540 594
541 595 /* A RevlogNG v1 index entry is 64 bytes long. */
542 596 static const long v1_hdrsize = 64;
543 597
544 598 /*
545 599 * Return a pointer to the beginning of a RevlogNG record.
546 600 */
547 601 static const char *index_deref(indexObject *self, Py_ssize_t pos)
548 602 {
549 603 if (self->inlined && pos > 0) {
550 604 if (self->offsets == NULL) {
551 605 self->offsets = malloc(self->raw_length *
552 606 sizeof(*self->offsets));
553 607 if (self->offsets == NULL)
554 608 return (const char *)PyErr_NoMemory();
555 609 inline_scan(self, self->offsets);
556 610 }
557 611 return self->offsets[pos];
558 612 }
559 613
560 614 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
561 615 }
562 616
563 617 /*
564 618 * RevlogNG format (all in big endian, data may be inlined):
565 619 * 6 bytes: offset
566 620 * 2 bytes: flags
567 621 * 4 bytes: compressed length
568 622 * 4 bytes: uncompressed length
569 623 * 4 bytes: base revision
570 624 * 4 bytes: link revision
571 625 * 4 bytes: parent 1 revision
572 626 * 4 bytes: parent 2 revision
573 627 * 32 bytes: nodeid (only 20 bytes used)
574 628 */
575 629 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
576 630 {
577 631 uint64_t offset_flags;
578 632 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
579 633 const char *c_node_id;
580 634 const char *data;
581 635 Py_ssize_t length = index_length(self);
582 636 PyObject *entry;
583 637
584 638 if (pos < 0)
585 639 pos += length;
586 640
587 641 if (pos < 0 || pos >= length) {
588 642 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
589 643 return NULL;
590 644 }
591 645
592 646 if (pos == length - 1) {
593 647 Py_INCREF(nullentry);
594 648 return nullentry;
595 649 }
596 650
597 651 if (pos >= self->length - 1) {
598 652 PyObject *obj;
599 653 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
600 654 Py_INCREF(obj);
601 655 return obj;
602 656 }
603 657
604 658 if (self->cache) {
605 659 if (self->cache[pos]) {
606 660 Py_INCREF(self->cache[pos]);
607 661 return self->cache[pos];
608 662 }
609 663 } else {
610 664 self->cache = calloc(self->raw_length, sizeof(PyObject *));
611 665 if (self->cache == NULL)
612 666 return PyErr_NoMemory();
613 667 }
614 668
615 669 data = index_deref(self, pos);
616 670 if (data == NULL)
617 671 return NULL;
618 672
619 673 offset_flags = getbe32(data + 4);
620 674 if (pos == 0) /* mask out version number for the first entry */
621 675 offset_flags &= 0xFFFF;
622 676 else {
623 677 uint32_t offset_high = getbe32(data);
624 678 offset_flags |= ((uint64_t)offset_high) << 32;
625 679 }
626 680
627 681 comp_len = getbe32(data + 8);
628 682 uncomp_len = getbe32(data + 12);
629 683 base_rev = getbe32(data + 16);
630 684 link_rev = getbe32(data + 20);
631 685 parent_1 = getbe32(data + 24);
632 686 parent_2 = getbe32(data + 28);
633 687 c_node_id = data + 32;
634 688
635 689 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
636 690 uncomp_len, base_rev, link_rev,
637 691 parent_1, parent_2, c_node_id, 20);
638 692
639 693 if (entry) {
640 694 PyObject_GC_UnTrack(entry);
641 695 Py_INCREF(entry);
642 696 }
643 697
644 698 self->cache[pos] = entry;
645 699
646 700 return entry;
647 701 }
648 702
649 703 /*
650 704 * Return the 20-byte SHA of the node corresponding to the given rev.
651 705 */
652 706 static const char *index_node(indexObject *self, Py_ssize_t pos)
653 707 {
654 708 Py_ssize_t length = index_length(self);
655 709 const char *data;
656 710
657 711 if (pos == length - 1 || pos == INT_MAX)
658 712 return nullid;
659 713
660 714 if (pos >= length)
661 715 return NULL;
662 716
663 717 if (pos >= self->length - 1) {
664 718 PyObject *tuple, *str;
665 719 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
666 720 str = PyTuple_GetItem(tuple, 7);
667 721 return str ? PyString_AS_STRING(str) : NULL;
668 722 }
669 723
670 724 data = index_deref(self, pos);
671 725 return data ? data + 32 : NULL;
672 726 }
673 727
674 728 static int nt_insert(indexObject *self, const char *node, int rev);
675 729
676 730 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
677 731 {
678 732 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
679 733 return -1;
680 734 if (*nodelen == 20)
681 735 return 0;
682 736 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
683 737 return -1;
684 738 }
685 739
686 740 static PyObject *index_insert(indexObject *self, PyObject *args)
687 741 {
688 742 PyObject *obj;
689 743 char *node;
690 744 int index;
691 745 Py_ssize_t len, nodelen;
692 746
693 747 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
694 748 return NULL;
695 749
696 750 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
697 751 PyErr_SetString(PyExc_TypeError, "8-tuple required");
698 752 return NULL;
699 753 }
700 754
701 755 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
702 756 return NULL;
703 757
704 758 len = index_length(self);
705 759
706 760 if (index < 0)
707 761 index += len;
708 762
709 763 if (index != len - 1) {
710 764 PyErr_SetString(PyExc_IndexError,
711 765 "insert only supported at index -1");
712 766 return NULL;
713 767 }
714 768
715 769 if (self->added == NULL) {
716 770 self->added = PyList_New(0);
717 771 if (self->added == NULL)
718 772 return NULL;
719 773 }
720 774
721 775 if (PyList_Append(self->added, obj) == -1)
722 776 return NULL;
723 777
724 778 if (self->nt)
725 779 nt_insert(self, node, index);
726 780
727 781 Py_CLEAR(self->headrevs);
728 782 Py_RETURN_NONE;
729 783 }
730 784
731 785 static void _index_clearcaches(indexObject *self)
732 786 {
733 787 if (self->cache) {
734 788 Py_ssize_t i;
735 789
736 790 for (i = 0; i < self->raw_length; i++)
737 791 Py_CLEAR(self->cache[i]);
738 792 free(self->cache);
739 793 self->cache = NULL;
740 794 }
741 795 if (self->offsets) {
742 796 free(self->offsets);
743 797 self->offsets = NULL;
744 798 }
745 799 if (self->nt) {
746 800 free(self->nt);
747 801 self->nt = NULL;
748 802 }
749 803 Py_CLEAR(self->headrevs);
750 804 }
751 805
752 806 static PyObject *index_clearcaches(indexObject *self)
753 807 {
754 808 _index_clearcaches(self);
755 809 self->ntlength = self->ntcapacity = 0;
756 810 self->ntdepth = self->ntsplits = 0;
757 811 self->ntrev = -1;
758 812 self->ntlookups = self->ntmisses = 0;
759 813 Py_RETURN_NONE;
760 814 }
761 815
762 816 static PyObject *index_stats(indexObject *self)
763 817 {
764 818 PyObject *obj = PyDict_New();
765 819
766 820 if (obj == NULL)
767 821 return NULL;
768 822
769 823 #define istat(__n, __d) \
770 824 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
771 825 goto bail;
772 826
773 827 if (self->added) {
774 828 Py_ssize_t len = PyList_GET_SIZE(self->added);
775 829 if (PyDict_SetItemString(obj, "index entries added",
776 830 PyInt_FromSsize_t(len)) == -1)
777 831 goto bail;
778 832 }
779 833
780 834 if (self->raw_length != self->length - 1)
781 835 istat(raw_length, "revs on disk");
782 836 istat(length, "revs in memory");
783 837 istat(ntcapacity, "node trie capacity");
784 838 istat(ntdepth, "node trie depth");
785 839 istat(ntlength, "node trie count");
786 840 istat(ntlookups, "node trie lookups");
787 841 istat(ntmisses, "node trie misses");
788 842 istat(ntrev, "node trie last rev scanned");
789 843 istat(ntsplits, "node trie splits");
790 844
791 845 #undef istat
792 846
793 847 return obj;
794 848
795 849 bail:
796 850 Py_XDECREF(obj);
797 851 return NULL;
798 852 }
799 853
800 854 /*
801 855 * When we cache a list, we want to be sure the caller can't mutate
802 856 * the cached copy.
803 857 */
804 858 static PyObject *list_copy(PyObject *list)
805 859 {
806 860 Py_ssize_t len = PyList_GET_SIZE(list);
807 861 PyObject *newlist = PyList_New(len);
808 862 Py_ssize_t i;
809 863
810 864 if (newlist == NULL)
811 865 return NULL;
812 866
813 867 for (i = 0; i < len; i++) {
814 868 PyObject *obj = PyList_GET_ITEM(list, i);
815 869 Py_INCREF(obj);
816 870 PyList_SET_ITEM(newlist, i, obj);
817 871 }
818 872
819 873 return newlist;
820 874 }
821 875
822 876 static int check_filter(PyObject *filter, Py_ssize_t arg) {
823 877 if (filter) {
824 878 PyObject *arglist, *result;
825 879 int isfiltered;
826 880
827 881 arglist = Py_BuildValue("(n)", arg);
828 882 if (!arglist) {
829 883 return -1;
830 884 }
831 885
832 886 result = PyEval_CallObject(filter, arglist);
833 887 Py_DECREF(arglist);
834 888 if (!result) {
835 889 return -1;
836 890 }
837 891
838 892 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
839 893 * same as this function, so we can just return it directly.*/
840 894 isfiltered = PyObject_IsTrue(result);
841 895 Py_DECREF(result);
842 896 return isfiltered;
843 897 } else {
844 898 return 0;
845 899 }
846 900 }
847 901
848 902 static PyObject *index_headrevs(indexObject *self, PyObject *args)
849 903 {
850 904 Py_ssize_t i, len, addlen;
851 905 char *nothead = NULL;
852 906 PyObject *heads = NULL;
853 907 PyObject *filter = NULL;
854 908 PyObject *filteredrevs = Py_None;
855 909
856 910 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
857 911 return NULL;
858 912 }
859 913
860 914 if (self->headrevs && filteredrevs == self->filteredrevs)
861 915 return list_copy(self->headrevs);
862 916
863 917 Py_DECREF(self->filteredrevs);
864 918 self->filteredrevs = filteredrevs;
865 919 Py_INCREF(filteredrevs);
866 920
867 921 if (filteredrevs != Py_None) {
868 922 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
869 923 if (!filter) {
870 924 PyErr_SetString(PyExc_TypeError,
871 925 "filteredrevs has no attribute __contains__");
872 926 goto bail;
873 927 }
874 928 }
875 929
876 930 len = index_length(self) - 1;
877 931 heads = PyList_New(0);
878 932 if (heads == NULL)
879 933 goto bail;
880 934 if (len == 0) {
881 935 PyObject *nullid = PyInt_FromLong(-1);
882 936 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
883 937 Py_XDECREF(nullid);
884 938 goto bail;
885 939 }
886 940 goto done;
887 941 }
888 942
889 943 nothead = calloc(len, 1);
890 944 if (nothead == NULL)
891 945 goto bail;
892 946
893 947 for (i = 0; i < self->raw_length; i++) {
894 948 const char *data;
895 949 int parent_1, parent_2, isfiltered;
896 950
897 951 isfiltered = check_filter(filter, i);
898 952 if (isfiltered == -1) {
899 953 PyErr_SetString(PyExc_TypeError,
900 954 "unable to check filter");
901 955 goto bail;
902 956 }
903 957
904 958 if (isfiltered) {
905 959 nothead[i] = 1;
906 960 continue;
907 961 }
908 962
909 963 data = index_deref(self, i);
910 964 parent_1 = getbe32(data + 24);
911 965 parent_2 = getbe32(data + 28);
912 966
913 967 if (parent_1 >= 0)
914 968 nothead[parent_1] = 1;
915 969 if (parent_2 >= 0)
916 970 nothead[parent_2] = 1;
917 971 }
918 972
919 973 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
920 974
921 975 for (i = 0; i < addlen; i++) {
922 976 PyObject *rev = PyList_GET_ITEM(self->added, i);
923 977 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
924 978 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
925 979 long parent_1, parent_2;
926 980 int isfiltered;
927 981
928 982 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
929 983 PyErr_SetString(PyExc_TypeError,
930 984 "revlog parents are invalid");
931 985 goto bail;
932 986 }
933 987
934 988 isfiltered = check_filter(filter, i);
935 989 if (isfiltered == -1) {
936 990 PyErr_SetString(PyExc_TypeError,
937 991 "unable to check filter");
938 992 goto bail;
939 993 }
940 994
941 995 if (isfiltered) {
942 996 nothead[i] = 1;
943 997 continue;
944 998 }
945 999
946 1000 parent_1 = PyInt_AS_LONG(p1);
947 1001 parent_2 = PyInt_AS_LONG(p2);
948 1002 if (parent_1 >= 0)
949 1003 nothead[parent_1] = 1;
950 1004 if (parent_2 >= 0)
951 1005 nothead[parent_2] = 1;
952 1006 }
953 1007
954 1008 for (i = 0; i < len; i++) {
955 1009 PyObject *head;
956 1010
957 1011 if (nothead[i])
958 1012 continue;
959 1013 head = PyInt_FromSsize_t(i);
960 1014 if (head == NULL || PyList_Append(heads, head) == -1) {
961 1015 Py_XDECREF(head);
962 1016 goto bail;
963 1017 }
964 1018 }
965 1019
966 1020 done:
967 1021 self->headrevs = heads;
968 1022 Py_XDECREF(filter);
969 1023 free(nothead);
970 1024 return list_copy(self->headrevs);
971 1025 bail:
972 1026 Py_XDECREF(filter);
973 1027 Py_XDECREF(heads);
974 1028 free(nothead);
975 1029 return NULL;
976 1030 }
977 1031
978 1032 static inline int nt_level(const char *node, Py_ssize_t level)
979 1033 {
980 1034 int v = node[level>>1];
981 1035 if (!(level & 1))
982 1036 v >>= 4;
983 1037 return v & 0xf;
984 1038 }
985 1039
986 1040 /*
987 1041 * Return values:
988 1042 *
989 1043 * -4: match is ambiguous (multiple candidates)
990 1044 * -2: not found
991 1045 * rest: valid rev
992 1046 */
993 1047 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
994 1048 int hex)
995 1049 {
996 1050 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
997 1051 int level, maxlevel, off;
998 1052
999 1053 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1000 1054 return -1;
1001 1055
1002 1056 if (self->nt == NULL)
1003 1057 return -2;
1004 1058
1005 1059 if (hex)
1006 1060 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1007 1061 else
1008 1062 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1009 1063
1010 1064 for (level = off = 0; level < maxlevel; level++) {
1011 1065 int k = getnybble(node, level);
1012 1066 nodetree *n = &self->nt[off];
1013 1067 int v = n->children[k];
1014 1068
1015 1069 if (v < 0) {
1016 1070 const char *n;
1017 1071 Py_ssize_t i;
1018 1072
1019 1073 v = -v - 1;
1020 1074 n = index_node(self, v);
1021 1075 if (n == NULL)
1022 1076 return -2;
1023 1077 for (i = level; i < maxlevel; i++)
1024 1078 if (getnybble(node, i) != nt_level(n, i))
1025 1079 return -2;
1026 1080 return v;
1027 1081 }
1028 1082 if (v == 0)
1029 1083 return -2;
1030 1084 off = v;
1031 1085 }
1032 1086 /* multiple matches against an ambiguous prefix */
1033 1087 return -4;
1034 1088 }
1035 1089
1036 1090 static int nt_new(indexObject *self)
1037 1091 {
1038 1092 if (self->ntlength == self->ntcapacity) {
1039 1093 self->ntcapacity *= 2;
1040 1094 self->nt = realloc(self->nt,
1041 1095 self->ntcapacity * sizeof(nodetree));
1042 1096 if (self->nt == NULL) {
1043 1097 PyErr_SetString(PyExc_MemoryError, "out of memory");
1044 1098 return -1;
1045 1099 }
1046 1100 memset(&self->nt[self->ntlength], 0,
1047 1101 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1048 1102 }
1049 1103 return self->ntlength++;
1050 1104 }
1051 1105
1052 1106 static int nt_insert(indexObject *self, const char *node, int rev)
1053 1107 {
1054 1108 int level = 0;
1055 1109 int off = 0;
1056 1110
1057 1111 while (level < 40) {
1058 1112 int k = nt_level(node, level);
1059 1113 nodetree *n;
1060 1114 int v;
1061 1115
1062 1116 n = &self->nt[off];
1063 1117 v = n->children[k];
1064 1118
1065 1119 if (v == 0) {
1066 1120 n->children[k] = -rev - 1;
1067 1121 return 0;
1068 1122 }
1069 1123 if (v < 0) {
1070 1124 const char *oldnode = index_node(self, -v - 1);
1071 1125 int noff;
1072 1126
1073 1127 if (!oldnode || !memcmp(oldnode, node, 20)) {
1074 1128 n->children[k] = -rev - 1;
1075 1129 return 0;
1076 1130 }
1077 1131 noff = nt_new(self);
1078 1132 if (noff == -1)
1079 1133 return -1;
1080 1134 /* self->nt may have been changed by realloc */
1081 1135 self->nt[off].children[k] = noff;
1082 1136 off = noff;
1083 1137 n = &self->nt[off];
1084 1138 n->children[nt_level(oldnode, ++level)] = v;
1085 1139 if (level > self->ntdepth)
1086 1140 self->ntdepth = level;
1087 1141 self->ntsplits += 1;
1088 1142 } else {
1089 1143 level += 1;
1090 1144 off = v;
1091 1145 }
1092 1146 }
1093 1147
1094 1148 return -1;
1095 1149 }
1096 1150
1097 1151 static int nt_init(indexObject *self)
1098 1152 {
1099 1153 if (self->nt == NULL) {
1100 1154 if (self->raw_length > INT_MAX) {
1101 1155 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1102 1156 return -1;
1103 1157 }
1104 1158 self->ntcapacity = self->raw_length < 4
1105 1159 ? 4 : (int)self->raw_length / 2;
1106 1160
1107 1161 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1108 1162 if (self->nt == NULL) {
1109 1163 PyErr_NoMemory();
1110 1164 return -1;
1111 1165 }
1112 1166 self->ntlength = 1;
1113 1167 self->ntrev = (int)index_length(self) - 1;
1114 1168 self->ntlookups = 1;
1115 1169 self->ntmisses = 0;
1116 1170 if (nt_insert(self, nullid, INT_MAX) == -1)
1117 1171 return -1;
1118 1172 }
1119 1173 return 0;
1120 1174 }
1121 1175
1122 1176 /*
1123 1177 * Return values:
1124 1178 *
1125 1179 * -3: error (exception set)
1126 1180 * -2: not found (no exception set)
1127 1181 * rest: valid rev
1128 1182 */
1129 1183 static int index_find_node(indexObject *self,
1130 1184 const char *node, Py_ssize_t nodelen)
1131 1185 {
1132 1186 int rev;
1133 1187
1134 1188 self->ntlookups++;
1135 1189 rev = nt_find(self, node, nodelen, 0);
1136 1190 if (rev >= -1)
1137 1191 return rev;
1138 1192
1139 1193 if (nt_init(self) == -1)
1140 1194 return -3;
1141 1195
1142 1196 /*
1143 1197 * For the first handful of lookups, we scan the entire index,
1144 1198 * and cache only the matching nodes. This optimizes for cases
1145 1199 * like "hg tip", where only a few nodes are accessed.
1146 1200 *
1147 1201 * After that, we cache every node we visit, using a single
1148 1202 * scan amortized over multiple lookups. This gives the best
1149 1203 * bulk performance, e.g. for "hg log".
1150 1204 */
1151 1205 if (self->ntmisses++ < 4) {
1152 1206 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1153 1207 const char *n = index_node(self, rev);
1154 1208 if (n == NULL)
1155 1209 return -2;
1156 1210 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1157 1211 if (nt_insert(self, n, rev) == -1)
1158 1212 return -3;
1159 1213 break;
1160 1214 }
1161 1215 }
1162 1216 } else {
1163 1217 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1164 1218 const char *n = index_node(self, rev);
1165 1219 if (n == NULL) {
1166 1220 self->ntrev = rev + 1;
1167 1221 return -2;
1168 1222 }
1169 1223 if (nt_insert(self, n, rev) == -1) {
1170 1224 self->ntrev = rev + 1;
1171 1225 return -3;
1172 1226 }
1173 1227 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1174 1228 break;
1175 1229 }
1176 1230 }
1177 1231 self->ntrev = rev;
1178 1232 }
1179 1233
1180 1234 if (rev >= 0)
1181 1235 return rev;
1182 1236 return -2;
1183 1237 }
1184 1238
1185 1239 static PyObject *raise_revlog_error(void)
1186 1240 {
1187 1241 static PyObject *errclass;
1188 1242 PyObject *mod = NULL, *errobj;
1189 1243
1190 1244 if (errclass == NULL) {
1191 1245 PyObject *dict;
1192 1246
1193 1247 mod = PyImport_ImportModule("mercurial.error");
1194 1248 if (mod == NULL)
1195 1249 goto classfail;
1196 1250
1197 1251 dict = PyModule_GetDict(mod);
1198 1252 if (dict == NULL)
1199 1253 goto classfail;
1200 1254
1201 1255 errclass = PyDict_GetItemString(dict, "RevlogError");
1202 1256 if (errclass == NULL) {
1203 1257 PyErr_SetString(PyExc_SystemError,
1204 1258 "could not find RevlogError");
1205 1259 goto classfail;
1206 1260 }
1207 1261 Py_INCREF(errclass);
1208 1262 }
1209 1263
1210 1264 errobj = PyObject_CallFunction(errclass, NULL);
1211 1265 if (errobj == NULL)
1212 1266 return NULL;
1213 1267 PyErr_SetObject(errclass, errobj);
1214 1268 return errobj;
1215 1269
1216 1270 classfail:
1217 1271 Py_XDECREF(mod);
1218 1272 return NULL;
1219 1273 }
1220 1274
1221 1275 static PyObject *index_getitem(indexObject *self, PyObject *value)
1222 1276 {
1223 1277 char *node;
1224 1278 Py_ssize_t nodelen;
1225 1279 int rev;
1226 1280
1227 1281 if (PyInt_Check(value))
1228 1282 return index_get(self, PyInt_AS_LONG(value));
1229 1283
1230 1284 if (node_check(value, &node, &nodelen) == -1)
1231 1285 return NULL;
1232 1286 rev = index_find_node(self, node, nodelen);
1233 1287 if (rev >= -1)
1234 1288 return PyInt_FromLong(rev);
1235 1289 if (rev == -2)
1236 1290 raise_revlog_error();
1237 1291 return NULL;
1238 1292 }
1239 1293
1240 1294 static int nt_partialmatch(indexObject *self, const char *node,
1241 1295 Py_ssize_t nodelen)
1242 1296 {
1243 1297 int rev;
1244 1298
1245 1299 if (nt_init(self) == -1)
1246 1300 return -3;
1247 1301
1248 1302 if (self->ntrev > 0) {
1249 1303 /* ensure that the radix tree is fully populated */
1250 1304 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1251 1305 const char *n = index_node(self, rev);
1252 1306 if (n == NULL)
1253 1307 return -2;
1254 1308 if (nt_insert(self, n, rev) == -1)
1255 1309 return -3;
1256 1310 }
1257 1311 self->ntrev = rev;
1258 1312 }
1259 1313
1260 1314 return nt_find(self, node, nodelen, 1);
1261 1315 }
1262 1316
1263 1317 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1264 1318 {
1265 1319 const char *fullnode;
1266 1320 int nodelen;
1267 1321 char *node;
1268 1322 int rev, i;
1269 1323
1270 1324 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1271 1325 return NULL;
1272 1326
1273 1327 if (nodelen < 4) {
1274 1328 PyErr_SetString(PyExc_ValueError, "key too short");
1275 1329 return NULL;
1276 1330 }
1277 1331
1278 1332 if (nodelen > 40) {
1279 1333 PyErr_SetString(PyExc_ValueError, "key too long");
1280 1334 return NULL;
1281 1335 }
1282 1336
1283 1337 for (i = 0; i < nodelen; i++)
1284 1338 hexdigit(node, i);
1285 1339 if (PyErr_Occurred()) {
1286 1340 /* input contains non-hex characters */
1287 1341 PyErr_Clear();
1288 1342 Py_RETURN_NONE;
1289 1343 }
1290 1344
1291 1345 rev = nt_partialmatch(self, node, nodelen);
1292 1346
1293 1347 switch (rev) {
1294 1348 case -4:
1295 1349 raise_revlog_error();
1296 1350 case -3:
1297 1351 return NULL;
1298 1352 case -2:
1299 1353 Py_RETURN_NONE;
1300 1354 case -1:
1301 1355 return PyString_FromStringAndSize(nullid, 20);
1302 1356 }
1303 1357
1304 1358 fullnode = index_node(self, rev);
1305 1359 if (fullnode == NULL) {
1306 1360 PyErr_Format(PyExc_IndexError,
1307 1361 "could not access rev %d", rev);
1308 1362 return NULL;
1309 1363 }
1310 1364 return PyString_FromStringAndSize(fullnode, 20);
1311 1365 }
1312 1366
1313 1367 static PyObject *index_m_get(indexObject *self, PyObject *args)
1314 1368 {
1315 1369 Py_ssize_t nodelen;
1316 1370 PyObject *val;
1317 1371 char *node;
1318 1372 int rev;
1319 1373
1320 1374 if (!PyArg_ParseTuple(args, "O", &val))
1321 1375 return NULL;
1322 1376 if (node_check(val, &node, &nodelen) == -1)
1323 1377 return NULL;
1324 1378 rev = index_find_node(self, node, nodelen);
1325 1379 if (rev == -3)
1326 1380 return NULL;
1327 1381 if (rev == -2)
1328 1382 Py_RETURN_NONE;
1329 1383 return PyInt_FromLong(rev);
1330 1384 }
1331 1385
1332 1386 static int index_contains(indexObject *self, PyObject *value)
1333 1387 {
1334 1388 char *node;
1335 1389 Py_ssize_t nodelen;
1336 1390
1337 1391 if (PyInt_Check(value)) {
1338 1392 long rev = PyInt_AS_LONG(value);
1339 1393 return rev >= -1 && rev < index_length(self);
1340 1394 }
1341 1395
1342 1396 if (node_check(value, &node, &nodelen) == -1)
1343 1397 return -1;
1344 1398
1345 1399 switch (index_find_node(self, node, nodelen)) {
1346 1400 case -3:
1347 1401 return -1;
1348 1402 case -2:
1349 1403 return 0;
1350 1404 default:
1351 1405 return 1;
1352 1406 }
1353 1407 }
1354 1408
1355 1409 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1356 1410 {
1357 1411 if (rev >= self->length - 1) {
1358 1412 PyObject *tuple = PyList_GET_ITEM(self->added,
1359 1413 rev - self->length + 1);
1360 1414 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1361 1415 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1362 1416 } else {
1363 1417 const char *data = index_deref(self, rev);
1364 1418 ps[0] = getbe32(data + 24);
1365 1419 ps[1] = getbe32(data + 28);
1366 1420 }
1367 1421 }
1368 1422
1369 1423 typedef uint64_t bitmask;
1370 1424
1371 1425 /*
1372 1426 * Given a disjoint set of revs, return all candidates for the
1373 1427 * greatest common ancestor. In revset notation, this is the set
1374 1428 * "heads(::a and ::b and ...)"
1375 1429 */
1376 1430 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1377 1431 int revcount)
1378 1432 {
1379 1433 const bitmask allseen = (1ull << revcount) - 1;
1380 1434 const bitmask poison = 1ull << revcount;
1381 1435 PyObject *gca = PyList_New(0);
1382 1436 int i, v, interesting;
1383 1437 int maxrev = -1;
1384 1438 bitmask sp;
1385 1439 bitmask *seen;
1386 1440
1387 1441 if (gca == NULL)
1388 1442 return PyErr_NoMemory();
1389 1443
1390 1444 for (i = 0; i < revcount; i++) {
1391 1445 if (revs[i] > maxrev)
1392 1446 maxrev = revs[i];
1393 1447 }
1394 1448
1395 1449 seen = calloc(sizeof(*seen), maxrev + 1);
1396 1450 if (seen == NULL) {
1397 1451 Py_DECREF(gca);
1398 1452 return PyErr_NoMemory();
1399 1453 }
1400 1454
1401 1455 for (i = 0; i < revcount; i++)
1402 1456 seen[revs[i]] = 1ull << i;
1403 1457
1404 1458 interesting = revcount;
1405 1459
1406 1460 for (v = maxrev; v >= 0 && interesting; v--) {
1407 1461 bitmask sv = seen[v];
1408 1462 int parents[2];
1409 1463
1410 1464 if (!sv)
1411 1465 continue;
1412 1466
1413 1467 if (sv < poison) {
1414 1468 interesting -= 1;
1415 1469 if (sv == allseen) {
1416 1470 PyObject *obj = PyInt_FromLong(v);
1417 1471 if (obj == NULL)
1418 1472 goto bail;
1419 1473 if (PyList_Append(gca, obj) == -1) {
1420 1474 Py_DECREF(obj);
1421 1475 goto bail;
1422 1476 }
1423 1477 sv |= poison;
1424 1478 for (i = 0; i < revcount; i++) {
1425 1479 if (revs[i] == v)
1426 1480 goto done;
1427 1481 }
1428 1482 }
1429 1483 }
1430 1484 index_get_parents(self, v, parents);
1431 1485
1432 1486 for (i = 0; i < 2; i++) {
1433 1487 int p = parents[i];
1434 1488 if (p == -1)
1435 1489 continue;
1436 1490 sp = seen[p];
1437 1491 if (sv < poison) {
1438 1492 if (sp == 0) {
1439 1493 seen[p] = sv;
1440 1494 interesting++;
1441 1495 }
1442 1496 else if (sp != sv)
1443 1497 seen[p] |= sv;
1444 1498 } else {
1445 1499 if (sp && sp < poison)
1446 1500 interesting--;
1447 1501 seen[p] = sv;
1448 1502 }
1449 1503 }
1450 1504 }
1451 1505
1452 1506 done:
1453 1507 free(seen);
1454 1508 return gca;
1455 1509 bail:
1456 1510 free(seen);
1457 1511 Py_XDECREF(gca);
1458 1512 return NULL;
1459 1513 }
1460 1514
1461 1515 /*
1462 1516 * Given a disjoint set of revs, return the subset with the longest
1463 1517 * path to the root.
1464 1518 */
1465 1519 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1466 1520 {
1467 1521 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1468 1522 static const Py_ssize_t capacity = 24;
1469 1523 int *depth, *interesting = NULL;
1470 1524 int i, j, v, ninteresting;
1471 1525 PyObject *dict = NULL, *keys = NULL;
1472 1526 long *seen = NULL;
1473 1527 int maxrev = -1;
1474 1528 long final;
1475 1529
1476 1530 if (revcount > capacity) {
1477 1531 PyErr_Format(PyExc_OverflowError,
1478 1532 "bitset size (%ld) > capacity (%ld)",
1479 1533 (long)revcount, (long)capacity);
1480 1534 return NULL;
1481 1535 }
1482 1536
1483 1537 for (i = 0; i < revcount; i++) {
1484 1538 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1485 1539 if (n > maxrev)
1486 1540 maxrev = n;
1487 1541 }
1488 1542
1489 1543 depth = calloc(sizeof(*depth), maxrev + 1);
1490 1544 if (depth == NULL)
1491 1545 return PyErr_NoMemory();
1492 1546
1493 1547 seen = calloc(sizeof(*seen), maxrev + 1);
1494 1548 if (seen == NULL) {
1495 1549 PyErr_NoMemory();
1496 1550 goto bail;
1497 1551 }
1498 1552
1499 1553 interesting = calloc(sizeof(*interesting), 2 << revcount);
1500 1554 if (interesting == NULL) {
1501 1555 PyErr_NoMemory();
1502 1556 goto bail;
1503 1557 }
1504 1558
1505 1559 if (PyList_Sort(revs) == -1)
1506 1560 goto bail;
1507 1561
1508 1562 for (i = 0; i < revcount; i++) {
1509 1563 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1510 1564 long b = 1l << i;
1511 1565 depth[n] = 1;
1512 1566 seen[n] = b;
1513 1567 interesting[b] = 1;
1514 1568 }
1515 1569
1516 1570 ninteresting = (int)revcount;
1517 1571
1518 1572 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1519 1573 int dv = depth[v];
1520 1574 int parents[2];
1521 1575 long sv;
1522 1576
1523 1577 if (dv == 0)
1524 1578 continue;
1525 1579
1526 1580 sv = seen[v];
1527 1581 index_get_parents(self, v, parents);
1528 1582
1529 1583 for (i = 0; i < 2; i++) {
1530 1584 int p = parents[i];
1531 1585 long nsp, sp;
1532 1586 int dp;
1533 1587
1534 1588 if (p == -1)
1535 1589 continue;
1536 1590
1537 1591 dp = depth[p];
1538 1592 nsp = sp = seen[p];
1539 1593 if (dp <= dv) {
1540 1594 depth[p] = dv + 1;
1541 1595 if (sp != sv) {
1542 1596 interesting[sv] += 1;
1543 1597 nsp = seen[p] = sv;
1544 1598 if (sp) {
1545 1599 interesting[sp] -= 1;
1546 1600 if (interesting[sp] == 0)
1547 1601 ninteresting -= 1;
1548 1602 }
1549 1603 }
1550 1604 }
1551 1605 else if (dv == dp - 1) {
1552 1606 nsp = sp | sv;
1553 1607 if (nsp == sp)
1554 1608 continue;
1555 1609 seen[p] = nsp;
1556 1610 interesting[sp] -= 1;
1557 1611 if (interesting[sp] == 0 && interesting[nsp] > 0)
1558 1612 ninteresting -= 1;
1559 1613 interesting[nsp] += 1;
1560 1614 }
1561 1615 }
1562 1616 interesting[sv] -= 1;
1563 1617 if (interesting[sv] == 0)
1564 1618 ninteresting -= 1;
1565 1619 }
1566 1620
1567 1621 final = 0;
1568 1622 j = ninteresting;
1569 1623 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1570 1624 if (interesting[i] == 0)
1571 1625 continue;
1572 1626 final |= i;
1573 1627 j -= 1;
1574 1628 }
1575 1629 if (final == 0) {
1576 1630 keys = PyList_New(0);
1577 1631 goto bail;
1578 1632 }
1579 1633
1580 1634 dict = PyDict_New();
1581 1635 if (dict == NULL)
1582 1636 goto bail;
1583 1637
1584 1638 for (i = 0; i < revcount; i++) {
1585 1639 PyObject *key;
1586 1640
1587 1641 if ((final & (1 << i)) == 0)
1588 1642 continue;
1589 1643
1590 1644 key = PyList_GET_ITEM(revs, i);
1591 1645 Py_INCREF(key);
1592 1646 Py_INCREF(Py_None);
1593 1647 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1594 1648 Py_DECREF(key);
1595 1649 Py_DECREF(Py_None);
1596 1650 goto bail;
1597 1651 }
1598 1652 }
1599 1653
1600 1654 keys = PyDict_Keys(dict);
1601 1655
1602 1656 bail:
1603 1657 free(depth);
1604 1658 free(seen);
1605 1659 free(interesting);
1606 1660 Py_XDECREF(dict);
1607 1661
1608 1662 return keys;
1609 1663 }
1610 1664
1611 1665 /*
1612 1666 * Given a (possibly overlapping) set of revs, return the greatest
1613 1667 * common ancestors: those with the longest path to the root.
1614 1668 */
1615 1669 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1616 1670 {
1617 1671 PyObject *ret = NULL, *gca = NULL;
1618 1672 Py_ssize_t argcount, i, len;
1619 1673 bitmask repeat = 0;
1620 1674 int revcount = 0;
1621 1675 int *revs;
1622 1676
1623 1677 argcount = PySequence_Length(args);
1624 1678 revs = malloc(argcount * sizeof(*revs));
1625 1679 if (argcount > 0 && revs == NULL)
1626 1680 return PyErr_NoMemory();
1627 1681 len = index_length(self) - 1;
1628 1682
1629 1683 for (i = 0; i < argcount; i++) {
1630 1684 static const int capacity = 24;
1631 1685 PyObject *obj = PySequence_GetItem(args, i);
1632 1686 bitmask x;
1633 1687 long val;
1634 1688
1635 1689 if (!PyInt_Check(obj)) {
1636 1690 PyErr_SetString(PyExc_TypeError,
1637 1691 "arguments must all be ints");
1638 1692 goto bail;
1639 1693 }
1640 1694 val = PyInt_AsLong(obj);
1641 1695 if (val == -1) {
1642 1696 ret = PyList_New(0);
1643 1697 goto done;
1644 1698 }
1645 1699 if (val < 0 || val >= len) {
1646 1700 PyErr_SetString(PyExc_IndexError,
1647 1701 "index out of range");
1648 1702 goto bail;
1649 1703 }
1650 1704 /* this cheesy bloom filter lets us avoid some more
1651 1705 * expensive duplicate checks in the common set-is-disjoint
1652 1706 * case */
1653 1707 x = 1ull << (val & 0x3f);
1654 1708 if (repeat & x) {
1655 1709 int k;
1656 1710 for (k = 0; k < revcount; k++) {
1657 1711 if (val == revs[k])
1658 1712 goto duplicate;
1659 1713 }
1660 1714 }
1661 1715 else repeat |= x;
1662 1716 if (revcount >= capacity) {
1663 1717 PyErr_Format(PyExc_OverflowError,
1664 1718 "bitset size (%d) > capacity (%d)",
1665 1719 revcount, capacity);
1666 1720 goto bail;
1667 1721 }
1668 1722 revs[revcount++] = (int)val;
1669 1723 duplicate:;
1670 1724 }
1671 1725
1672 1726 if (revcount == 0) {
1673 1727 ret = PyList_New(0);
1674 1728 goto done;
1675 1729 }
1676 1730 if (revcount == 1) {
1677 1731 PyObject *obj;
1678 1732 ret = PyList_New(1);
1679 1733 if (ret == NULL)
1680 1734 goto bail;
1681 1735 obj = PyInt_FromLong(revs[0]);
1682 1736 if (obj == NULL)
1683 1737 goto bail;
1684 1738 PyList_SET_ITEM(ret, 0, obj);
1685 1739 goto done;
1686 1740 }
1687 1741
1688 1742 gca = find_gca_candidates(self, revs, revcount);
1689 1743 if (gca == NULL)
1690 1744 goto bail;
1691 1745
1692 1746 if (PyList_GET_SIZE(gca) <= 1) {
1693 1747 ret = gca;
1694 1748 Py_INCREF(gca);
1695 1749 }
1696 1750 else ret = find_deepest(self, gca);
1697 1751
1698 1752 done:
1699 1753 free(revs);
1700 1754 Py_XDECREF(gca);
1701 1755
1702 1756 return ret;
1703 1757
1704 1758 bail:
1705 1759 free(revs);
1706 1760 Py_XDECREF(gca);
1707 1761 Py_XDECREF(ret);
1708 1762 return NULL;
1709 1763 }
1710 1764
1711 1765 /*
1712 1766 * Given a (possibly overlapping) set of revs, return all the
1713 1767 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1714 1768 */
1715 1769 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1716 1770 {
1717 1771 PyObject *ret = NULL;
1718 1772 Py_ssize_t argcount, i, len;
1719 1773 bitmask repeat = 0;
1720 1774 int revcount = 0;
1721 1775 int *revs;
1722 1776
1723 1777 argcount = PySequence_Length(args);
1724 1778 revs = malloc(argcount * sizeof(*revs));
1725 1779 if (argcount > 0 && revs == NULL)
1726 1780 return PyErr_NoMemory();
1727 1781 len = index_length(self) - 1;
1728 1782
1729 1783 for (i = 0; i < argcount; i++) {
1730 1784 static const int capacity = 24;
1731 1785 PyObject *obj = PySequence_GetItem(args, i);
1732 1786 bitmask x;
1733 1787 long val;
1734 1788
1735 1789 if (!PyInt_Check(obj)) {
1736 1790 PyErr_SetString(PyExc_TypeError,
1737 1791 "arguments must all be ints");
1738 1792 goto bail;
1739 1793 }
1740 1794 val = PyInt_AsLong(obj);
1741 1795 if (val == -1) {
1742 1796 ret = PyList_New(0);
1743 1797 goto done;
1744 1798 }
1745 1799 if (val < 0 || val >= len) {
1746 1800 PyErr_SetString(PyExc_IndexError,
1747 1801 "index out of range");
1748 1802 goto bail;
1749 1803 }
1750 1804 /* this cheesy bloom filter lets us avoid some more
1751 1805 * expensive duplicate checks in the common set-is-disjoint
1752 1806 * case */
1753 1807 x = 1ull << (val & 0x3f);
1754 1808 if (repeat & x) {
1755 1809 int k;
1756 1810 for (k = 0; k < revcount; k++) {
1757 1811 if (val == revs[k])
1758 1812 goto duplicate;
1759 1813 }
1760 1814 }
1761 1815 else repeat |= x;
1762 1816 if (revcount >= capacity) {
1763 1817 PyErr_Format(PyExc_OverflowError,
1764 1818 "bitset size (%d) > capacity (%d)",
1765 1819 revcount, capacity);
1766 1820 goto bail;
1767 1821 }
1768 1822 revs[revcount++] = (int)val;
1769 1823 duplicate:;
1770 1824 }
1771 1825
1772 1826 if (revcount == 0) {
1773 1827 ret = PyList_New(0);
1774 1828 goto done;
1775 1829 }
1776 1830 if (revcount == 1) {
1777 1831 PyObject *obj;
1778 1832 ret = PyList_New(1);
1779 1833 if (ret == NULL)
1780 1834 goto bail;
1781 1835 obj = PyInt_FromLong(revs[0]);
1782 1836 if (obj == NULL)
1783 1837 goto bail;
1784 1838 PyList_SET_ITEM(ret, 0, obj);
1785 1839 goto done;
1786 1840 }
1787 1841
1788 1842 ret = find_gca_candidates(self, revs, revcount);
1789 1843 if (ret == NULL)
1790 1844 goto bail;
1791 1845
1792 1846 done:
1793 1847 free(revs);
1794 1848 return ret;
1795 1849
1796 1850 bail:
1797 1851 free(revs);
1798 1852 Py_XDECREF(ret);
1799 1853 return NULL;
1800 1854 }
1801 1855
1802 1856 /*
1803 1857 * Invalidate any trie entries introduced by added revs.
1804 1858 */
1805 1859 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1806 1860 {
1807 1861 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1808 1862
1809 1863 for (i = start; i < len; i++) {
1810 1864 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1811 1865 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1812 1866
1813 1867 nt_insert(self, PyString_AS_STRING(node), -1);
1814 1868 }
1815 1869
1816 1870 if (start == 0)
1817 1871 Py_CLEAR(self->added);
1818 1872 }
1819 1873
1820 1874 /*
1821 1875 * Delete a numeric range of revs, which must be at the end of the
1822 1876 * range, but exclude the sentinel nullid entry.
1823 1877 */
1824 1878 static int index_slice_del(indexObject *self, PyObject *item)
1825 1879 {
1826 1880 Py_ssize_t start, stop, step, slicelength;
1827 1881 Py_ssize_t length = index_length(self);
1828 1882 int ret = 0;
1829 1883
1830 1884 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1831 1885 &start, &stop, &step, &slicelength) < 0)
1832 1886 return -1;
1833 1887
1834 1888 if (slicelength <= 0)
1835 1889 return 0;
1836 1890
1837 1891 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1838 1892 stop = start;
1839 1893
1840 1894 if (step < 0) {
1841 1895 stop = start + 1;
1842 1896 start = stop + step*(slicelength - 1) - 1;
1843 1897 step = -step;
1844 1898 }
1845 1899
1846 1900 if (step != 1) {
1847 1901 PyErr_SetString(PyExc_ValueError,
1848 1902 "revlog index delete requires step size of 1");
1849 1903 return -1;
1850 1904 }
1851 1905
1852 1906 if (stop != length - 1) {
1853 1907 PyErr_SetString(PyExc_IndexError,
1854 1908 "revlog index deletion indices are invalid");
1855 1909 return -1;
1856 1910 }
1857 1911
1858 1912 if (start < self->length - 1) {
1859 1913 if (self->nt) {
1860 1914 Py_ssize_t i;
1861 1915
1862 1916 for (i = start + 1; i < self->length - 1; i++) {
1863 1917 const char *node = index_node(self, i);
1864 1918
1865 1919 if (node)
1866 1920 nt_insert(self, node, -1);
1867 1921 }
1868 1922 if (self->added)
1869 1923 nt_invalidate_added(self, 0);
1870 1924 if (self->ntrev > start)
1871 1925 self->ntrev = (int)start;
1872 1926 }
1873 1927 self->length = start + 1;
1874 1928 if (start < self->raw_length) {
1875 1929 if (self->cache) {
1876 1930 Py_ssize_t i;
1877 1931 for (i = start; i < self->raw_length; i++)
1878 1932 Py_CLEAR(self->cache[i]);
1879 1933 }
1880 1934 self->raw_length = start;
1881 1935 }
1882 1936 goto done;
1883 1937 }
1884 1938
1885 1939 if (self->nt) {
1886 1940 nt_invalidate_added(self, start - self->length + 1);
1887 1941 if (self->ntrev > start)
1888 1942 self->ntrev = (int)start;
1889 1943 }
1890 1944 if (self->added)
1891 1945 ret = PyList_SetSlice(self->added, start - self->length + 1,
1892 1946 PyList_GET_SIZE(self->added), NULL);
1893 1947 done:
1894 1948 Py_CLEAR(self->headrevs);
1895 1949 return ret;
1896 1950 }
1897 1951
1898 1952 /*
1899 1953 * Supported ops:
1900 1954 *
1901 1955 * slice deletion
1902 1956 * string assignment (extend node->rev mapping)
1903 1957 * string deletion (shrink node->rev mapping)
1904 1958 */
1905 1959 static int index_assign_subscript(indexObject *self, PyObject *item,
1906 1960 PyObject *value)
1907 1961 {
1908 1962 char *node;
1909 1963 Py_ssize_t nodelen;
1910 1964 long rev;
1911 1965
1912 1966 if (PySlice_Check(item) && value == NULL)
1913 1967 return index_slice_del(self, item);
1914 1968
1915 1969 if (node_check(item, &node, &nodelen) == -1)
1916 1970 return -1;
1917 1971
1918 1972 if (value == NULL)
1919 1973 return self->nt ? nt_insert(self, node, -1) : 0;
1920 1974 rev = PyInt_AsLong(value);
1921 1975 if (rev > INT_MAX || rev < 0) {
1922 1976 if (!PyErr_Occurred())
1923 1977 PyErr_SetString(PyExc_ValueError, "rev out of range");
1924 1978 return -1;
1925 1979 }
1926 1980 return nt_insert(self, node, (int)rev);
1927 1981 }
1928 1982
1929 1983 /*
1930 1984 * Find all RevlogNG entries in an index that has inline data. Update
1931 1985 * the optional "offsets" table with those entries.
1932 1986 */
1933 1987 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1934 1988 {
1935 1989 const char *data = PyString_AS_STRING(self->data);
1936 1990 Py_ssize_t pos = 0;
1937 1991 Py_ssize_t end = PyString_GET_SIZE(self->data);
1938 1992 long incr = v1_hdrsize;
1939 1993 Py_ssize_t len = 0;
1940 1994
1941 1995 while (pos + v1_hdrsize <= end && pos >= 0) {
1942 1996 uint32_t comp_len;
1943 1997 /* 3rd element of header is length of compressed inline data */
1944 1998 comp_len = getbe32(data + pos + 8);
1945 1999 incr = v1_hdrsize + comp_len;
1946 2000 if (offsets)
1947 2001 offsets[len] = data + pos;
1948 2002 len++;
1949 2003 pos += incr;
1950 2004 }
1951 2005
1952 2006 if (pos != end) {
1953 2007 if (!PyErr_Occurred())
1954 2008 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1955 2009 return -1;
1956 2010 }
1957 2011
1958 2012 return len;
1959 2013 }
1960 2014
1961 2015 static int index_init(indexObject *self, PyObject *args)
1962 2016 {
1963 2017 PyObject *data_obj, *inlined_obj;
1964 2018 Py_ssize_t size;
1965 2019
1966 2020 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1967 2021 self->raw_length = 0;
1968 2022 self->added = NULL;
1969 2023 self->cache = NULL;
1970 2024 self->data = NULL;
1971 2025 self->headrevs = NULL;
1972 2026 self->filteredrevs = Py_None;
1973 2027 Py_INCREF(Py_None);
1974 2028 self->nt = NULL;
1975 2029 self->offsets = NULL;
1976 2030
1977 2031 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1978 2032 return -1;
1979 2033 if (!PyString_Check(data_obj)) {
1980 2034 PyErr_SetString(PyExc_TypeError, "data is not a string");
1981 2035 return -1;
1982 2036 }
1983 2037 size = PyString_GET_SIZE(data_obj);
1984 2038
1985 2039 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1986 2040 self->data = data_obj;
1987 2041
1988 2042 self->ntlength = self->ntcapacity = 0;
1989 2043 self->ntdepth = self->ntsplits = 0;
1990 2044 self->ntlookups = self->ntmisses = 0;
1991 2045 self->ntrev = -1;
1992 2046 Py_INCREF(self->data);
1993 2047
1994 2048 if (self->inlined) {
1995 2049 Py_ssize_t len = inline_scan(self, NULL);
1996 2050 if (len == -1)
1997 2051 goto bail;
1998 2052 self->raw_length = len;
1999 2053 self->length = len + 1;
2000 2054 } else {
2001 2055 if (size % v1_hdrsize) {
2002 2056 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2003 2057 goto bail;
2004 2058 }
2005 2059 self->raw_length = size / v1_hdrsize;
2006 2060 self->length = self->raw_length + 1;
2007 2061 }
2008 2062
2009 2063 return 0;
2010 2064 bail:
2011 2065 return -1;
2012 2066 }
2013 2067
2014 2068 static PyObject *index_nodemap(indexObject *self)
2015 2069 {
2016 2070 Py_INCREF(self);
2017 2071 return (PyObject *)self;
2018 2072 }
2019 2073
2020 2074 static void index_dealloc(indexObject *self)
2021 2075 {
2022 2076 _index_clearcaches(self);
2023 2077 Py_XDECREF(self->filteredrevs);
2024 2078 Py_XDECREF(self->data);
2025 2079 Py_XDECREF(self->added);
2026 2080 PyObject_Del(self);
2027 2081 }
2028 2082
2029 2083 static PySequenceMethods index_sequence_methods = {
2030 2084 (lenfunc)index_length, /* sq_length */
2031 2085 0, /* sq_concat */
2032 2086 0, /* sq_repeat */
2033 2087 (ssizeargfunc)index_get, /* sq_item */
2034 2088 0, /* sq_slice */
2035 2089 0, /* sq_ass_item */
2036 2090 0, /* sq_ass_slice */
2037 2091 (objobjproc)index_contains, /* sq_contains */
2038 2092 };
2039 2093
2040 2094 static PyMappingMethods index_mapping_methods = {
2041 2095 (lenfunc)index_length, /* mp_length */
2042 2096 (binaryfunc)index_getitem, /* mp_subscript */
2043 2097 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2044 2098 };
2045 2099
2046 2100 static PyMethodDef index_methods[] = {
2047 2101 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2048 2102 "return the gca set of the given revs"},
2049 2103 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2050 2104 METH_VARARGS,
2051 2105 "return the heads of the common ancestors of the given revs"},
2052 2106 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2053 2107 "clear the index caches"},
2054 2108 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2055 2109 "get an index entry"},
2056 2110 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2057 2111 "get head revisions"},
2058 2112 {"insert", (PyCFunction)index_insert, METH_VARARGS,
2059 2113 "insert an index entry"},
2060 2114 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2061 2115 "match a potentially ambiguous node ID"},
2062 2116 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2063 2117 "stats for the index"},
2064 2118 {NULL} /* Sentinel */
2065 2119 };
2066 2120
2067 2121 static PyGetSetDef index_getset[] = {
2068 2122 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2069 2123 {NULL} /* Sentinel */
2070 2124 };
2071 2125
2072 2126 static PyTypeObject indexType = {
2073 2127 PyObject_HEAD_INIT(NULL)
2074 2128 0, /* ob_size */
2075 2129 "parsers.index", /* tp_name */
2076 2130 sizeof(indexObject), /* tp_basicsize */
2077 2131 0, /* tp_itemsize */
2078 2132 (destructor)index_dealloc, /* tp_dealloc */
2079 2133 0, /* tp_print */
2080 2134 0, /* tp_getattr */
2081 2135 0, /* tp_setattr */
2082 2136 0, /* tp_compare */
2083 2137 0, /* tp_repr */
2084 2138 0, /* tp_as_number */
2085 2139 &index_sequence_methods, /* tp_as_sequence */
2086 2140 &index_mapping_methods, /* tp_as_mapping */
2087 2141 0, /* tp_hash */
2088 2142 0, /* tp_call */
2089 2143 0, /* tp_str */
2090 2144 0, /* tp_getattro */
2091 2145 0, /* tp_setattro */
2092 2146 0, /* tp_as_buffer */
2093 2147 Py_TPFLAGS_DEFAULT, /* tp_flags */
2094 2148 "revlog index", /* tp_doc */
2095 2149 0, /* tp_traverse */
2096 2150 0, /* tp_clear */
2097 2151 0, /* tp_richcompare */
2098 2152 0, /* tp_weaklistoffset */
2099 2153 0, /* tp_iter */
2100 2154 0, /* tp_iternext */
2101 2155 index_methods, /* tp_methods */
2102 2156 0, /* tp_members */
2103 2157 index_getset, /* tp_getset */
2104 2158 0, /* tp_base */
2105 2159 0, /* tp_dict */
2106 2160 0, /* tp_descr_get */
2107 2161 0, /* tp_descr_set */
2108 2162 0, /* tp_dictoffset */
2109 2163 (initproc)index_init, /* tp_init */
2110 2164 0, /* tp_alloc */
2111 2165 };
2112 2166
2113 2167 /*
2114 2168 * returns a tuple of the form (index, index, cache) with elements as
2115 2169 * follows:
2116 2170 *
2117 2171 * index: an index object that lazily parses RevlogNG records
2118 2172 * cache: if data is inlined, a tuple (index_file_content, 0), else None
2119 2173 *
2120 2174 * added complications are for backwards compatibility
2121 2175 */
2122 2176 static PyObject *parse_index2(PyObject *self, PyObject *args)
2123 2177 {
2124 2178 PyObject *tuple = NULL, *cache = NULL;
2125 2179 indexObject *idx;
2126 2180 int ret;
2127 2181
2128 2182 idx = PyObject_New(indexObject, &indexType);
2129 2183 if (idx == NULL)
2130 2184 goto bail;
2131 2185
2132 2186 ret = index_init(idx, args);
2133 2187 if (ret == -1)
2134 2188 goto bail;
2135 2189
2136 2190 if (idx->inlined) {
2137 2191 cache = Py_BuildValue("iO", 0, idx->data);
2138 2192 if (cache == NULL)
2139 2193 goto bail;
2140 2194 } else {
2141 2195 cache = Py_None;
2142 2196 Py_INCREF(cache);
2143 2197 }
2144 2198
2145 2199 tuple = Py_BuildValue("NN", idx, cache);
2146 2200 if (!tuple)
2147 2201 goto bail;
2148 2202 return tuple;
2149 2203
2150 2204 bail:
2151 2205 Py_XDECREF(idx);
2152 2206 Py_XDECREF(cache);
2153 2207 Py_XDECREF(tuple);
2154 2208 return NULL;
2155 2209 }
2156 2210
2157 2211 static char parsers_doc[] = "Efficient content parsing.";
2158 2212
2159 2213 PyObject *encodedir(PyObject *self, PyObject *args);
2160 2214 PyObject *pathencode(PyObject *self, PyObject *args);
2161 2215 PyObject *lowerencode(PyObject *self, PyObject *args);
2162 2216
2163 2217 static PyMethodDef methods[] = {
2164 2218 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
2165 2219 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2166 2220 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2167 2221 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2222 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
2168 2223 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2169 2224 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2170 2225 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2171 2226 {NULL, NULL}
2172 2227 };
2173 2228
2174 2229 void dirs_module_init(PyObject *mod);
2175 2230
2176 2231 static void module_init(PyObject *mod)
2177 2232 {
2178 2233 /* This module constant has two purposes. First, it lets us unit test
2179 2234 * the ImportError raised without hard-coding any error text. This
2180 2235 * means we can change the text in the future without breaking tests,
2181 2236 * even across changesets without a recompile. Second, its presence
2182 2237 * can be used to determine whether the version-checking logic is
2183 2238 * present, which also helps in testing across changesets without a
2184 2239 * recompile. Note that this means the pure-Python version of parsers
2185 2240 * should not have this module constant. */
2186 2241 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2187 2242
2188 2243 dirs_module_init(mod);
2189 2244
2190 2245 indexType.tp_new = PyType_GenericNew;
2191 2246 if (PyType_Ready(&indexType) < 0 ||
2192 2247 PyType_Ready(&dirstateTupleType) < 0)
2193 2248 return;
2194 2249 Py_INCREF(&indexType);
2195 2250 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2196 2251 Py_INCREF(&dirstateTupleType);
2197 2252 PyModule_AddObject(mod, "dirstatetuple",
2198 2253 (PyObject *)&dirstateTupleType);
2199 2254
2200 2255 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2201 2256 -1, -1, -1, -1, nullid, 20);
2202 2257 if (nullentry)
2203 2258 PyObject_GC_UnTrack(nullentry);
2204 2259 }
2205 2260
2206 2261 static int check_python_version(void)
2207 2262 {
2208 2263 PyObject *sys = PyImport_ImportModule("sys");
2209 2264 long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
2210 2265 /* sys.hexversion is a 32-bit number by default, so the -1 case
2211 2266 * should only occur in unusual circumstances (e.g. if sys.hexversion
2212 2267 * is manually set to an invalid value). */
2213 2268 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2214 2269 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2215 2270 "modules were compiled with Python " PY_VERSION ", but "
2216 2271 "Mercurial is currently using Python with sys.hexversion=%ld: "
2217 2272 "Python %s\n at: %s", versionerrortext, hexversion,
2218 2273 Py_GetVersion(), Py_GetProgramFullPath());
2219 2274 return -1;
2220 2275 }
2221 2276 return 0;
2222 2277 }
2223 2278
2224 2279 #ifdef IS_PY3K
2225 2280 static struct PyModuleDef parsers_module = {
2226 2281 PyModuleDef_HEAD_INIT,
2227 2282 "parsers",
2228 2283 parsers_doc,
2229 2284 -1,
2230 2285 methods
2231 2286 };
2232 2287
2233 2288 PyMODINIT_FUNC PyInit_parsers(void)
2234 2289 {
2235 2290 PyObject *mod;
2236 2291
2237 2292 if (check_python_version() == -1)
2238 2293 return;
2239 2294 mod = PyModule_Create(&parsers_module);
2240 2295 module_init(mod);
2241 2296 return mod;
2242 2297 }
2243 2298 #else
2244 2299 PyMODINIT_FUNC initparsers(void)
2245 2300 {
2246 2301 PyObject *mod;
2247 2302
2248 2303 if (check_python_version() == -1)
2249 2304 return;
2250 2305 mod = Py_InitModule3("parsers", methods, parsers_doc);
2251 2306 module_init(mod);
2252 2307 }
2253 2308 #endif
General Comments 0
You need to be logged in to leave comments. Login now