##// END OF EJS Templates
parsers: add a function to efficiently lowercase ASCII strings...
Siddharth Agarwal -
r22778:80f2b63d default
parent child Browse files
Show More
@@ -1,423 +1,432 b''
1 # encoding.py - character transcoding support for Mercurial
1 # encoding.py - character transcoding support for Mercurial
2 #
2 #
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import error
8 import error, parsers
9 import unicodedata, locale, os
9 import unicodedata, locale, os
10
10
11 def _getpreferredencoding():
11 def _getpreferredencoding():
12 '''
12 '''
13 On darwin, getpreferredencoding ignores the locale environment and
13 On darwin, getpreferredencoding ignores the locale environment and
14 always returns mac-roman. http://bugs.python.org/issue6202 fixes this
14 always returns mac-roman. http://bugs.python.org/issue6202 fixes this
15 for Python 2.7 and up. This is the same corrected code for earlier
15 for Python 2.7 and up. This is the same corrected code for earlier
16 Python versions.
16 Python versions.
17
17
18 However, we can't use a version check for this method, as some distributions
18 However, we can't use a version check for this method, as some distributions
19 patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
19 patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
20 encoding, as it is unlikely that this encoding is the actually expected.
20 encoding, as it is unlikely that this encoding is the actually expected.
21 '''
21 '''
22 try:
22 try:
23 locale.CODESET
23 locale.CODESET
24 except AttributeError:
24 except AttributeError:
25 # Fall back to parsing environment variables :-(
25 # Fall back to parsing environment variables :-(
26 return locale.getdefaultlocale()[1]
26 return locale.getdefaultlocale()[1]
27
27
28 oldloc = locale.setlocale(locale.LC_CTYPE)
28 oldloc = locale.setlocale(locale.LC_CTYPE)
29 locale.setlocale(locale.LC_CTYPE, "")
29 locale.setlocale(locale.LC_CTYPE, "")
30 result = locale.nl_langinfo(locale.CODESET)
30 result = locale.nl_langinfo(locale.CODESET)
31 locale.setlocale(locale.LC_CTYPE, oldloc)
31 locale.setlocale(locale.LC_CTYPE, oldloc)
32
32
33 return result
33 return result
34
34
35 _encodingfixers = {
35 _encodingfixers = {
36 '646': lambda: 'ascii',
36 '646': lambda: 'ascii',
37 'ANSI_X3.4-1968': lambda: 'ascii',
37 'ANSI_X3.4-1968': lambda: 'ascii',
38 'mac-roman': _getpreferredencoding
38 'mac-roman': _getpreferredencoding
39 }
39 }
40
40
41 try:
41 try:
42 encoding = os.environ.get("HGENCODING")
42 encoding = os.environ.get("HGENCODING")
43 if not encoding:
43 if not encoding:
44 encoding = locale.getpreferredencoding() or 'ascii'
44 encoding = locale.getpreferredencoding() or 'ascii'
45 encoding = _encodingfixers.get(encoding, lambda: encoding)()
45 encoding = _encodingfixers.get(encoding, lambda: encoding)()
46 except locale.Error:
46 except locale.Error:
47 encoding = 'ascii'
47 encoding = 'ascii'
48 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
48 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
49 fallbackencoding = 'ISO-8859-1'
49 fallbackencoding = 'ISO-8859-1'
50
50
51 class localstr(str):
51 class localstr(str):
52 '''This class allows strings that are unmodified to be
52 '''This class allows strings that are unmodified to be
53 round-tripped to the local encoding and back'''
53 round-tripped to the local encoding and back'''
54 def __new__(cls, u, l):
54 def __new__(cls, u, l):
55 s = str.__new__(cls, l)
55 s = str.__new__(cls, l)
56 s._utf8 = u
56 s._utf8 = u
57 return s
57 return s
58 def __hash__(self):
58 def __hash__(self):
59 return hash(self._utf8) # avoid collisions in local string space
59 return hash(self._utf8) # avoid collisions in local string space
60
60
61 def tolocal(s):
61 def tolocal(s):
62 """
62 """
63 Convert a string from internal UTF-8 to local encoding
63 Convert a string from internal UTF-8 to local encoding
64
64
65 All internal strings should be UTF-8 but some repos before the
65 All internal strings should be UTF-8 but some repos before the
66 implementation of locale support may contain latin1 or possibly
66 implementation of locale support may contain latin1 or possibly
67 other character sets. We attempt to decode everything strictly
67 other character sets. We attempt to decode everything strictly
68 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
68 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
69 replace unknown characters.
69 replace unknown characters.
70
70
71 The localstr class is used to cache the known UTF-8 encoding of
71 The localstr class is used to cache the known UTF-8 encoding of
72 strings next to their local representation to allow lossless
72 strings next to their local representation to allow lossless
73 round-trip conversion back to UTF-8.
73 round-trip conversion back to UTF-8.
74
74
75 >>> u = 'foo: \\xc3\\xa4' # utf-8
75 >>> u = 'foo: \\xc3\\xa4' # utf-8
76 >>> l = tolocal(u)
76 >>> l = tolocal(u)
77 >>> l
77 >>> l
78 'foo: ?'
78 'foo: ?'
79 >>> fromlocal(l)
79 >>> fromlocal(l)
80 'foo: \\xc3\\xa4'
80 'foo: \\xc3\\xa4'
81 >>> u2 = 'foo: \\xc3\\xa1'
81 >>> u2 = 'foo: \\xc3\\xa1'
82 >>> d = { l: 1, tolocal(u2): 2 }
82 >>> d = { l: 1, tolocal(u2): 2 }
83 >>> len(d) # no collision
83 >>> len(d) # no collision
84 2
84 2
85 >>> 'foo: ?' in d
85 >>> 'foo: ?' in d
86 False
86 False
87 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
87 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
88 >>> l = tolocal(l1)
88 >>> l = tolocal(l1)
89 >>> l
89 >>> l
90 'foo: ?'
90 'foo: ?'
91 >>> fromlocal(l) # magically in utf-8
91 >>> fromlocal(l) # magically in utf-8
92 'foo: \\xc3\\xa4'
92 'foo: \\xc3\\xa4'
93 """
93 """
94
94
95 try:
95 try:
96 try:
96 try:
97 # make sure string is actually stored in UTF-8
97 # make sure string is actually stored in UTF-8
98 u = s.decode('UTF-8')
98 u = s.decode('UTF-8')
99 if encoding == 'UTF-8':
99 if encoding == 'UTF-8':
100 # fast path
100 # fast path
101 return s
101 return s
102 r = u.encode(encoding, "replace")
102 r = u.encode(encoding, "replace")
103 if u == r.decode(encoding):
103 if u == r.decode(encoding):
104 # r is a safe, non-lossy encoding of s
104 # r is a safe, non-lossy encoding of s
105 return r
105 return r
106 return localstr(s, r)
106 return localstr(s, r)
107 except UnicodeDecodeError:
107 except UnicodeDecodeError:
108 # we should only get here if we're looking at an ancient changeset
108 # we should only get here if we're looking at an ancient changeset
109 try:
109 try:
110 u = s.decode(fallbackencoding)
110 u = s.decode(fallbackencoding)
111 r = u.encode(encoding, "replace")
111 r = u.encode(encoding, "replace")
112 if u == r.decode(encoding):
112 if u == r.decode(encoding):
113 # r is a safe, non-lossy encoding of s
113 # r is a safe, non-lossy encoding of s
114 return r
114 return r
115 return localstr(u.encode('UTF-8'), r)
115 return localstr(u.encode('UTF-8'), r)
116 except UnicodeDecodeError:
116 except UnicodeDecodeError:
117 u = s.decode("utf-8", "replace") # last ditch
117 u = s.decode("utf-8", "replace") # last ditch
118 return u.encode(encoding, "replace") # can't round-trip
118 return u.encode(encoding, "replace") # can't round-trip
119 except LookupError, k:
119 except LookupError, k:
120 raise error.Abort(k, hint="please check your locale settings")
120 raise error.Abort(k, hint="please check your locale settings")
121
121
122 def fromlocal(s):
122 def fromlocal(s):
123 """
123 """
124 Convert a string from the local character encoding to UTF-8
124 Convert a string from the local character encoding to UTF-8
125
125
126 We attempt to decode strings using the encoding mode set by
126 We attempt to decode strings using the encoding mode set by
127 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
127 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
128 characters will cause an error message. Other modes include
128 characters will cause an error message. Other modes include
129 'replace', which replaces unknown characters with a special
129 'replace', which replaces unknown characters with a special
130 Unicode character, and 'ignore', which drops the character.
130 Unicode character, and 'ignore', which drops the character.
131 """
131 """
132
132
133 # can we do a lossless round-trip?
133 # can we do a lossless round-trip?
134 if isinstance(s, localstr):
134 if isinstance(s, localstr):
135 return s._utf8
135 return s._utf8
136
136
137 try:
137 try:
138 return s.decode(encoding, encodingmode).encode("utf-8")
138 return s.decode(encoding, encodingmode).encode("utf-8")
139 except UnicodeDecodeError, inst:
139 except UnicodeDecodeError, inst:
140 sub = s[max(0, inst.start - 10):inst.start + 10]
140 sub = s[max(0, inst.start - 10):inst.start + 10]
141 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
141 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
142 except LookupError, k:
142 except LookupError, k:
143 raise error.Abort(k, hint="please check your locale settings")
143 raise error.Abort(k, hint="please check your locale settings")
144
144
145 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
145 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
146 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
146 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
147 and "WFA" or "WF")
147 and "WFA" or "WF")
148
148
149 def colwidth(s):
149 def colwidth(s):
150 "Find the column width of a string for display in the local encoding"
150 "Find the column width of a string for display in the local encoding"
151 return ucolwidth(s.decode(encoding, 'replace'))
151 return ucolwidth(s.decode(encoding, 'replace'))
152
152
153 def ucolwidth(d):
153 def ucolwidth(d):
154 "Find the column width of a Unicode string for display"
154 "Find the column width of a Unicode string for display"
155 eaw = getattr(unicodedata, 'east_asian_width', None)
155 eaw = getattr(unicodedata, 'east_asian_width', None)
156 if eaw is not None:
156 if eaw is not None:
157 return sum([eaw(c) in wide and 2 or 1 for c in d])
157 return sum([eaw(c) in wide and 2 or 1 for c in d])
158 return len(d)
158 return len(d)
159
159
160 def getcols(s, start, c):
160 def getcols(s, start, c):
161 '''Use colwidth to find a c-column substring of s starting at byte
161 '''Use colwidth to find a c-column substring of s starting at byte
162 index start'''
162 index start'''
163 for x in xrange(start + c, len(s)):
163 for x in xrange(start + c, len(s)):
164 t = s[start:x]
164 t = s[start:x]
165 if colwidth(t) == c:
165 if colwidth(t) == c:
166 return t
166 return t
167
167
168 def trim(s, width, ellipsis='', leftside=False):
168 def trim(s, width, ellipsis='', leftside=False):
169 """Trim string 's' to at most 'width' columns (including 'ellipsis').
169 """Trim string 's' to at most 'width' columns (including 'ellipsis').
170
170
171 If 'leftside' is True, left side of string 's' is trimmed.
171 If 'leftside' is True, left side of string 's' is trimmed.
172 'ellipsis' is always placed at trimmed side.
172 'ellipsis' is always placed at trimmed side.
173
173
174 >>> ellipsis = '+++'
174 >>> ellipsis = '+++'
175 >>> from mercurial import encoding
175 >>> from mercurial import encoding
176 >>> encoding.encoding = 'utf-8'
176 >>> encoding.encoding = 'utf-8'
177 >>> t= '1234567890'
177 >>> t= '1234567890'
178 >>> print trim(t, 12, ellipsis=ellipsis)
178 >>> print trim(t, 12, ellipsis=ellipsis)
179 1234567890
179 1234567890
180 >>> print trim(t, 10, ellipsis=ellipsis)
180 >>> print trim(t, 10, ellipsis=ellipsis)
181 1234567890
181 1234567890
182 >>> print trim(t, 8, ellipsis=ellipsis)
182 >>> print trim(t, 8, ellipsis=ellipsis)
183 12345+++
183 12345+++
184 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
184 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
185 +++67890
185 +++67890
186 >>> print trim(t, 8)
186 >>> print trim(t, 8)
187 12345678
187 12345678
188 >>> print trim(t, 8, leftside=True)
188 >>> print trim(t, 8, leftside=True)
189 34567890
189 34567890
190 >>> print trim(t, 3, ellipsis=ellipsis)
190 >>> print trim(t, 3, ellipsis=ellipsis)
191 +++
191 +++
192 >>> print trim(t, 1, ellipsis=ellipsis)
192 >>> print trim(t, 1, ellipsis=ellipsis)
193 +
193 +
194 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
194 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
195 >>> t = u.encode(encoding.encoding)
195 >>> t = u.encode(encoding.encoding)
196 >>> print trim(t, 12, ellipsis=ellipsis)
196 >>> print trim(t, 12, ellipsis=ellipsis)
197 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
197 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
198 >>> print trim(t, 10, ellipsis=ellipsis)
198 >>> print trim(t, 10, ellipsis=ellipsis)
199 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
199 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
200 >>> print trim(t, 8, ellipsis=ellipsis)
200 >>> print trim(t, 8, ellipsis=ellipsis)
201 \xe3\x81\x82\xe3\x81\x84+++
201 \xe3\x81\x82\xe3\x81\x84+++
202 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
202 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
203 +++\xe3\x81\x88\xe3\x81\x8a
203 +++\xe3\x81\x88\xe3\x81\x8a
204 >>> print trim(t, 5)
204 >>> print trim(t, 5)
205 \xe3\x81\x82\xe3\x81\x84
205 \xe3\x81\x82\xe3\x81\x84
206 >>> print trim(t, 5, leftside=True)
206 >>> print trim(t, 5, leftside=True)
207 \xe3\x81\x88\xe3\x81\x8a
207 \xe3\x81\x88\xe3\x81\x8a
208 >>> print trim(t, 4, ellipsis=ellipsis)
208 >>> print trim(t, 4, ellipsis=ellipsis)
209 +++
209 +++
210 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
210 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
211 +++
211 +++
212 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
212 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
213 >>> print trim(t, 12, ellipsis=ellipsis)
213 >>> print trim(t, 12, ellipsis=ellipsis)
214 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
214 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
215 >>> print trim(t, 10, ellipsis=ellipsis)
215 >>> print trim(t, 10, ellipsis=ellipsis)
216 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
216 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
217 >>> print trim(t, 8, ellipsis=ellipsis)
217 >>> print trim(t, 8, ellipsis=ellipsis)
218 \x11\x22\x33\x44\x55+++
218 \x11\x22\x33\x44\x55+++
219 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
219 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
220 +++\x66\x77\x88\x99\xaa
220 +++\x66\x77\x88\x99\xaa
221 >>> print trim(t, 8)
221 >>> print trim(t, 8)
222 \x11\x22\x33\x44\x55\x66\x77\x88
222 \x11\x22\x33\x44\x55\x66\x77\x88
223 >>> print trim(t, 8, leftside=True)
223 >>> print trim(t, 8, leftside=True)
224 \x33\x44\x55\x66\x77\x88\x99\xaa
224 \x33\x44\x55\x66\x77\x88\x99\xaa
225 >>> print trim(t, 3, ellipsis=ellipsis)
225 >>> print trim(t, 3, ellipsis=ellipsis)
226 +++
226 +++
227 >>> print trim(t, 1, ellipsis=ellipsis)
227 >>> print trim(t, 1, ellipsis=ellipsis)
228 +
228 +
229 """
229 """
230 try:
230 try:
231 u = s.decode(encoding)
231 u = s.decode(encoding)
232 except UnicodeDecodeError:
232 except UnicodeDecodeError:
233 if len(s) <= width: # trimming is not needed
233 if len(s) <= width: # trimming is not needed
234 return s
234 return s
235 width -= len(ellipsis)
235 width -= len(ellipsis)
236 if width <= 0: # no enough room even for ellipsis
236 if width <= 0: # no enough room even for ellipsis
237 return ellipsis[:width + len(ellipsis)]
237 return ellipsis[:width + len(ellipsis)]
238 if leftside:
238 if leftside:
239 return ellipsis + s[-width:]
239 return ellipsis + s[-width:]
240 return s[:width] + ellipsis
240 return s[:width] + ellipsis
241
241
242 if ucolwidth(u) <= width: # trimming is not needed
242 if ucolwidth(u) <= width: # trimming is not needed
243 return s
243 return s
244
244
245 width -= len(ellipsis)
245 width -= len(ellipsis)
246 if width <= 0: # no enough room even for ellipsis
246 if width <= 0: # no enough room even for ellipsis
247 return ellipsis[:width + len(ellipsis)]
247 return ellipsis[:width + len(ellipsis)]
248
248
249 if leftside:
249 if leftside:
250 uslice = lambda i: u[i:]
250 uslice = lambda i: u[i:]
251 concat = lambda s: ellipsis + s
251 concat = lambda s: ellipsis + s
252 else:
252 else:
253 uslice = lambda i: u[:-i]
253 uslice = lambda i: u[:-i]
254 concat = lambda s: s + ellipsis
254 concat = lambda s: s + ellipsis
255 for i in xrange(1, len(u)):
255 for i in xrange(1, len(u)):
256 usub = uslice(i)
256 usub = uslice(i)
257 if ucolwidth(usub) <= width:
257 if ucolwidth(usub) <= width:
258 return concat(usub.encode(encoding))
258 return concat(usub.encode(encoding))
259 return ellipsis # no enough room for multi-column characters
259 return ellipsis # no enough room for multi-column characters
260
260
261 def asciilower(s):
262 '''convert a string to lowercase if ASCII
263
264 Raises UnicodeDecodeError if non-ASCII characters are found.'''
265 s.decode('ascii')
266 return s.lower()
267
268 asciilower = getattr(parsers, 'asciilower', asciilower)
269
261 def lower(s):
270 def lower(s):
262 "best-effort encoding-aware case-folding of local string s"
271 "best-effort encoding-aware case-folding of local string s"
263 try:
272 try:
264 s.decode('ascii') # throw exception for non-ASCII character
273 s.decode('ascii') # throw exception for non-ASCII character
265 return s.lower()
274 return s.lower()
266 except UnicodeDecodeError:
275 except UnicodeDecodeError:
267 pass
276 pass
268 try:
277 try:
269 if isinstance(s, localstr):
278 if isinstance(s, localstr):
270 u = s._utf8.decode("utf-8")
279 u = s._utf8.decode("utf-8")
271 else:
280 else:
272 u = s.decode(encoding, encodingmode)
281 u = s.decode(encoding, encodingmode)
273
282
274 lu = u.lower()
283 lu = u.lower()
275 if u == lu:
284 if u == lu:
276 return s # preserve localstring
285 return s # preserve localstring
277 return lu.encode(encoding)
286 return lu.encode(encoding)
278 except UnicodeError:
287 except UnicodeError:
279 return s.lower() # we don't know how to fold this except in ASCII
288 return s.lower() # we don't know how to fold this except in ASCII
280 except LookupError, k:
289 except LookupError, k:
281 raise error.Abort(k, hint="please check your locale settings")
290 raise error.Abort(k, hint="please check your locale settings")
282
291
283 def upper(s):
292 def upper(s):
284 "best-effort encoding-aware case-folding of local string s"
293 "best-effort encoding-aware case-folding of local string s"
285 try:
294 try:
286 s.decode('ascii') # throw exception for non-ASCII character
295 s.decode('ascii') # throw exception for non-ASCII character
287 return s.upper()
296 return s.upper()
288 except UnicodeDecodeError:
297 except UnicodeDecodeError:
289 pass
298 pass
290 try:
299 try:
291 if isinstance(s, localstr):
300 if isinstance(s, localstr):
292 u = s._utf8.decode("utf-8")
301 u = s._utf8.decode("utf-8")
293 else:
302 else:
294 u = s.decode(encoding, encodingmode)
303 u = s.decode(encoding, encodingmode)
295
304
296 uu = u.upper()
305 uu = u.upper()
297 if u == uu:
306 if u == uu:
298 return s # preserve localstring
307 return s # preserve localstring
299 return uu.encode(encoding)
308 return uu.encode(encoding)
300 except UnicodeError:
309 except UnicodeError:
301 return s.upper() # we don't know how to fold this except in ASCII
310 return s.upper() # we don't know how to fold this except in ASCII
302 except LookupError, k:
311 except LookupError, k:
303 raise error.Abort(k, hint="please check your locale settings")
312 raise error.Abort(k, hint="please check your locale settings")
304
313
305 _jsonmap = {}
314 _jsonmap = {}
306
315
307 def jsonescape(s):
316 def jsonescape(s):
308 '''returns a string suitable for JSON
317 '''returns a string suitable for JSON
309
318
310 JSON is problematic for us because it doesn't support non-Unicode
319 JSON is problematic for us because it doesn't support non-Unicode
311 bytes. To deal with this, we take the following approach:
320 bytes. To deal with this, we take the following approach:
312
321
313 - localstr objects are converted back to UTF-8
322 - localstr objects are converted back to UTF-8
314 - valid UTF-8/ASCII strings are passed as-is
323 - valid UTF-8/ASCII strings are passed as-is
315 - other strings are converted to UTF-8b surrogate encoding
324 - other strings are converted to UTF-8b surrogate encoding
316 - apply JSON-specified string escaping
325 - apply JSON-specified string escaping
317
326
318 (escapes are doubled in these tests)
327 (escapes are doubled in these tests)
319
328
320 >>> jsonescape('this is a test')
329 >>> jsonescape('this is a test')
321 'this is a test'
330 'this is a test'
322 >>> jsonescape('escape characters: \\0 \\x0b \\t \\n \\r \\" \\\\')
331 >>> jsonescape('escape characters: \\0 \\x0b \\t \\n \\r \\" \\\\')
323 'escape characters: \\\\u0000 \\\\u000b \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
332 'escape characters: \\\\u0000 \\\\u000b \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
324 >>> jsonescape('a weird byte: \\xdd')
333 >>> jsonescape('a weird byte: \\xdd')
325 'a weird byte: \\xed\\xb3\\x9d'
334 'a weird byte: \\xed\\xb3\\x9d'
326 >>> jsonescape('utf-8: caf\\xc3\\xa9')
335 >>> jsonescape('utf-8: caf\\xc3\\xa9')
327 'utf-8: caf\\xc3\\xa9'
336 'utf-8: caf\\xc3\\xa9'
328 >>> jsonescape('')
337 >>> jsonescape('')
329 ''
338 ''
330 '''
339 '''
331
340
332 if not _jsonmap:
341 if not _jsonmap:
333 for x in xrange(32):
342 for x in xrange(32):
334 _jsonmap[chr(x)] = "\u%04x" %x
343 _jsonmap[chr(x)] = "\u%04x" %x
335 for x in xrange(32, 256):
344 for x in xrange(32, 256):
336 c = chr(x)
345 c = chr(x)
337 _jsonmap[c] = c
346 _jsonmap[c] = c
338 _jsonmap['\t'] = '\\t'
347 _jsonmap['\t'] = '\\t'
339 _jsonmap['\n'] = '\\n'
348 _jsonmap['\n'] = '\\n'
340 _jsonmap['\"'] = '\\"'
349 _jsonmap['\"'] = '\\"'
341 _jsonmap['\\'] = '\\\\'
350 _jsonmap['\\'] = '\\\\'
342 _jsonmap['\b'] = '\\b'
351 _jsonmap['\b'] = '\\b'
343 _jsonmap['\f'] = '\\f'
352 _jsonmap['\f'] = '\\f'
344 _jsonmap['\r'] = '\\r'
353 _jsonmap['\r'] = '\\r'
345
354
346 return ''.join(_jsonmap[c] for c in toutf8b(s))
355 return ''.join(_jsonmap[c] for c in toutf8b(s))
347
356
348 def toutf8b(s):
357 def toutf8b(s):
349 '''convert a local, possibly-binary string into UTF-8b
358 '''convert a local, possibly-binary string into UTF-8b
350
359
351 This is intended as a generic method to preserve data when working
360 This is intended as a generic method to preserve data when working
352 with schemes like JSON and XML that have no provision for
361 with schemes like JSON and XML that have no provision for
353 arbitrary byte strings. As Mercurial often doesn't know
362 arbitrary byte strings. As Mercurial often doesn't know
354 what encoding data is in, we use so-called UTF-8b.
363 what encoding data is in, we use so-called UTF-8b.
355
364
356 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
365 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
357 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
366 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
358 uDC00-uDCFF.
367 uDC00-uDCFF.
359
368
360 Principles of operation:
369 Principles of operation:
361
370
362 - ASCII and UTF-8 data successfully round-trips and is understood
371 - ASCII and UTF-8 data successfully round-trips and is understood
363 by Unicode-oriented clients
372 by Unicode-oriented clients
364 - filenames and file contents in arbitrary other encodings can have
373 - filenames and file contents in arbitrary other encodings can have
365 be round-tripped or recovered by clueful clients
374 be round-tripped or recovered by clueful clients
366 - local strings that have a cached known UTF-8 encoding (aka
375 - local strings that have a cached known UTF-8 encoding (aka
367 localstr) get sent as UTF-8 so Unicode-oriented clients get the
376 localstr) get sent as UTF-8 so Unicode-oriented clients get the
368 Unicode data they want
377 Unicode data they want
369 - because we must preserve UTF-8 bytestring in places such as
378 - because we must preserve UTF-8 bytestring in places such as
370 filenames, metadata can't be roundtripped without help
379 filenames, metadata can't be roundtripped without help
371
380
372 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
381 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
373 arbitrary bytes into an internal Unicode format that can be
382 arbitrary bytes into an internal Unicode format that can be
374 re-encoded back into the original. Here we are exposing the
383 re-encoded back into the original. Here we are exposing the
375 internal surrogate encoding as a UTF-8 string.)
384 internal surrogate encoding as a UTF-8 string.)
376 '''
385 '''
377
386
378 if isinstance(s, localstr):
387 if isinstance(s, localstr):
379 return s._utf8
388 return s._utf8
380
389
381 try:
390 try:
382 s.decode('utf-8')
391 s.decode('utf-8')
383 return s
392 return s
384 except UnicodeDecodeError:
393 except UnicodeDecodeError:
385 # surrogate-encode any characters that don't round-trip
394 # surrogate-encode any characters that don't round-trip
386 s2 = s.decode('utf-8', 'ignore').encode('utf-8')
395 s2 = s.decode('utf-8', 'ignore').encode('utf-8')
387 r = ""
396 r = ""
388 pos = 0
397 pos = 0
389 for c in s:
398 for c in s:
390 if s2[pos:pos + 1] == c:
399 if s2[pos:pos + 1] == c:
391 r += c
400 r += c
392 pos += 1
401 pos += 1
393 else:
402 else:
394 r += unichr(0xdc00 + ord(c)).encode('utf-8')
403 r += unichr(0xdc00 + ord(c)).encode('utf-8')
395 return r
404 return r
396
405
397 def fromutf8b(s):
406 def fromutf8b(s):
398 '''Given a UTF-8b string, return a local, possibly-binary string.
407 '''Given a UTF-8b string, return a local, possibly-binary string.
399
408
400 return the original binary string. This
409 return the original binary string. This
401 is a round-trip process for strings like filenames, but metadata
410 is a round-trip process for strings like filenames, but metadata
402 that's was passed through tolocal will remain in UTF-8.
411 that's was passed through tolocal will remain in UTF-8.
403
412
404 >>> m = "\\xc3\\xa9\\x99abcd"
413 >>> m = "\\xc3\\xa9\\x99abcd"
405 >>> n = toutf8b(m)
414 >>> n = toutf8b(m)
406 >>> n
415 >>> n
407 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
416 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
408 >>> fromutf8b(n) == m
417 >>> fromutf8b(n) == m
409 True
418 True
410 '''
419 '''
411
420
412 # fast path - look for uDxxx prefixes in s
421 # fast path - look for uDxxx prefixes in s
413 if "\xed" not in s:
422 if "\xed" not in s:
414 return s
423 return s
415
424
416 u = s.decode("utf-8")
425 u = s.decode("utf-8")
417 r = ""
426 r = ""
418 for c in u:
427 for c in u:
419 if ord(c) & 0xff00 == 0xdc00:
428 if ord(c) & 0xff00 == 0xdc00:
420 r += chr(ord(c) & 0xff)
429 r += chr(ord(c) & 0xff)
421 else:
430 else:
422 r += c.encode("utf-8")
431 r += c.encode("utf-8")
423 return r
432 return r
@@ -1,2253 +1,2308 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <stddef.h>
12 #include <stddef.h>
13 #include <string.h>
13 #include <string.h>
14
14
15 #include "util.h"
15 #include "util.h"
16
16
17 static char *versionerrortext = "Python minor version mismatch";
17 static char *versionerrortext = "Python minor version mismatch";
18
18
19 static int8_t hextable[256] = {
19 static int8_t hextable[256] = {
20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
36 };
36 };
37
37
38 static char lowertable[128] = {
39 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
40 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
41 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
42 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
43 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
44 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
45 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
46 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
47 '\x40',
48 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
49 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
50 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
51 '\x78', '\x79', '\x7a', /* X-Z */
52 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
53 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
54 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
55 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
56 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
57 };
58
38 static inline int hexdigit(const char *p, Py_ssize_t off)
59 static inline int hexdigit(const char *p, Py_ssize_t off)
39 {
60 {
40 int8_t val = hextable[(unsigned char)p[off]];
61 int8_t val = hextable[(unsigned char)p[off]];
41
62
42 if (val >= 0) {
63 if (val >= 0) {
43 return val;
64 return val;
44 }
65 }
45
66
46 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
67 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
47 return 0;
68 return 0;
48 }
69 }
49
70
50 /*
71 /*
51 * Turn a hex-encoded string into binary.
72 * Turn a hex-encoded string into binary.
52 */
73 */
53 static PyObject *unhexlify(const char *str, int len)
74 static PyObject *unhexlify(const char *str, int len)
54 {
75 {
55 PyObject *ret;
76 PyObject *ret;
56 char *d;
77 char *d;
57 int i;
78 int i;
58
79
59 ret = PyBytes_FromStringAndSize(NULL, len / 2);
80 ret = PyBytes_FromStringAndSize(NULL, len / 2);
60
81
61 if (!ret)
82 if (!ret)
62 return NULL;
83 return NULL;
63
84
64 d = PyBytes_AsString(ret);
85 d = PyBytes_AsString(ret);
65
86
66 for (i = 0; i < len;) {
87 for (i = 0; i < len;) {
67 int hi = hexdigit(str, i++);
88 int hi = hexdigit(str, i++);
68 int lo = hexdigit(str, i++);
89 int lo = hexdigit(str, i++);
69 *d++ = (hi << 4) | lo;
90 *d++ = (hi << 4) | lo;
70 }
91 }
71
92
72 return ret;
93 return ret;
73 }
94 }
74
95
96 static PyObject *asciilower(PyObject *self, PyObject *args)
97 {
98 char *str, *newstr;
99 int i, len;
100 PyObject *newobj = NULL;
101
102 if (!PyArg_ParseTuple(args, "s#", &str, &len))
103 goto quit;
104
105 newobj = PyBytes_FromStringAndSize(NULL, len);
106 if (!newobj)
107 goto quit;
108
109 newstr = PyBytes_AS_STRING(newobj);
110
111 for (i = 0; i < len; i++) {
112 char c = str[i];
113 if (c & 0x80) {
114 PyObject *err = PyUnicodeDecodeError_Create(
115 "ascii", str, len, i, (i + 1),
116 "unexpected code byte");
117 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
118 goto quit;
119 }
120 newstr[i] = lowertable[(unsigned char)c];
121 }
122
123 return newobj;
124 quit:
125 Py_XDECREF(newobj);
126 return NULL;
127 }
128
75 /*
129 /*
76 * This code assumes that a manifest is stitched together with newline
130 * This code assumes that a manifest is stitched together with newline
77 * ('\n') characters.
131 * ('\n') characters.
78 */
132 */
79 static PyObject *parse_manifest(PyObject *self, PyObject *args)
133 static PyObject *parse_manifest(PyObject *self, PyObject *args)
80 {
134 {
81 PyObject *mfdict, *fdict;
135 PyObject *mfdict, *fdict;
82 char *str, *start, *end;
136 char *str, *start, *end;
83 int len;
137 int len;
84
138
85 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
139 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
86 &PyDict_Type, &mfdict,
140 &PyDict_Type, &mfdict,
87 &PyDict_Type, &fdict,
141 &PyDict_Type, &fdict,
88 &str, &len))
142 &str, &len))
89 goto quit;
143 goto quit;
90
144
91 start = str;
145 start = str;
92 end = str + len;
146 end = str + len;
93 while (start < end) {
147 while (start < end) {
94 PyObject *file = NULL, *node = NULL;
148 PyObject *file = NULL, *node = NULL;
95 PyObject *flags = NULL;
149 PyObject *flags = NULL;
96 char *zero = NULL, *newline = NULL;
150 char *zero = NULL, *newline = NULL;
97 ptrdiff_t nlen;
151 ptrdiff_t nlen;
98
152
99 zero = memchr(start, '\0', end - start);
153 zero = memchr(start, '\0', end - start);
100 if (!zero) {
154 if (!zero) {
101 PyErr_SetString(PyExc_ValueError,
155 PyErr_SetString(PyExc_ValueError,
102 "manifest entry has no separator");
156 "manifest entry has no separator");
103 goto quit;
157 goto quit;
104 }
158 }
105
159
106 newline = memchr(zero + 1, '\n', end - (zero + 1));
160 newline = memchr(zero + 1, '\n', end - (zero + 1));
107 if (!newline) {
161 if (!newline) {
108 PyErr_SetString(PyExc_ValueError,
162 PyErr_SetString(PyExc_ValueError,
109 "manifest contains trailing garbage");
163 "manifest contains trailing garbage");
110 goto quit;
164 goto quit;
111 }
165 }
112
166
113 file = PyBytes_FromStringAndSize(start, zero - start);
167 file = PyBytes_FromStringAndSize(start, zero - start);
114
168
115 if (!file)
169 if (!file)
116 goto bail;
170 goto bail;
117
171
118 nlen = newline - zero - 1;
172 nlen = newline - zero - 1;
119
173
120 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
174 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
121 if (!node)
175 if (!node)
122 goto bail;
176 goto bail;
123
177
124 if (nlen > 40) {
178 if (nlen > 40) {
125 flags = PyBytes_FromStringAndSize(zero + 41,
179 flags = PyBytes_FromStringAndSize(zero + 41,
126 nlen - 40);
180 nlen - 40);
127 if (!flags)
181 if (!flags)
128 goto bail;
182 goto bail;
129
183
130 if (PyDict_SetItem(fdict, file, flags) == -1)
184 if (PyDict_SetItem(fdict, file, flags) == -1)
131 goto bail;
185 goto bail;
132 }
186 }
133
187
134 if (PyDict_SetItem(mfdict, file, node) == -1)
188 if (PyDict_SetItem(mfdict, file, node) == -1)
135 goto bail;
189 goto bail;
136
190
137 start = newline + 1;
191 start = newline + 1;
138
192
139 Py_XDECREF(flags);
193 Py_XDECREF(flags);
140 Py_XDECREF(node);
194 Py_XDECREF(node);
141 Py_XDECREF(file);
195 Py_XDECREF(file);
142 continue;
196 continue;
143 bail:
197 bail:
144 Py_XDECREF(flags);
198 Py_XDECREF(flags);
145 Py_XDECREF(node);
199 Py_XDECREF(node);
146 Py_XDECREF(file);
200 Py_XDECREF(file);
147 goto quit;
201 goto quit;
148 }
202 }
149
203
150 Py_INCREF(Py_None);
204 Py_INCREF(Py_None);
151 return Py_None;
205 return Py_None;
152 quit:
206 quit:
153 return NULL;
207 return NULL;
154 }
208 }
155
209
156 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
210 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
157 int size, int mtime)
211 int size, int mtime)
158 {
212 {
159 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
213 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
160 &dirstateTupleType);
214 &dirstateTupleType);
161 if (!t)
215 if (!t)
162 return NULL;
216 return NULL;
163 t->state = state;
217 t->state = state;
164 t->mode = mode;
218 t->mode = mode;
165 t->size = size;
219 t->size = size;
166 t->mtime = mtime;
220 t->mtime = mtime;
167 return t;
221 return t;
168 }
222 }
169
223
170 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
224 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
171 PyObject *kwds)
225 PyObject *kwds)
172 {
226 {
173 /* We do all the initialization here and not a tp_init function because
227 /* We do all the initialization here and not a tp_init function because
174 * dirstate_tuple is immutable. */
228 * dirstate_tuple is immutable. */
175 dirstateTupleObject *t;
229 dirstateTupleObject *t;
176 char state;
230 char state;
177 int size, mode, mtime;
231 int size, mode, mtime;
178 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
232 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
179 return NULL;
233 return NULL;
180
234
181 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
235 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
182 if (!t)
236 if (!t)
183 return NULL;
237 return NULL;
184 t->state = state;
238 t->state = state;
185 t->mode = mode;
239 t->mode = mode;
186 t->size = size;
240 t->size = size;
187 t->mtime = mtime;
241 t->mtime = mtime;
188
242
189 return (PyObject *)t;
243 return (PyObject *)t;
190 }
244 }
191
245
192 static void dirstate_tuple_dealloc(PyObject *o)
246 static void dirstate_tuple_dealloc(PyObject *o)
193 {
247 {
194 PyObject_Del(o);
248 PyObject_Del(o);
195 }
249 }
196
250
197 static Py_ssize_t dirstate_tuple_length(PyObject *o)
251 static Py_ssize_t dirstate_tuple_length(PyObject *o)
198 {
252 {
199 return 4;
253 return 4;
200 }
254 }
201
255
202 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
256 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
203 {
257 {
204 dirstateTupleObject *t = (dirstateTupleObject *)o;
258 dirstateTupleObject *t = (dirstateTupleObject *)o;
205 switch (i) {
259 switch (i) {
206 case 0:
260 case 0:
207 return PyBytes_FromStringAndSize(&t->state, 1);
261 return PyBytes_FromStringAndSize(&t->state, 1);
208 case 1:
262 case 1:
209 return PyInt_FromLong(t->mode);
263 return PyInt_FromLong(t->mode);
210 case 2:
264 case 2:
211 return PyInt_FromLong(t->size);
265 return PyInt_FromLong(t->size);
212 case 3:
266 case 3:
213 return PyInt_FromLong(t->mtime);
267 return PyInt_FromLong(t->mtime);
214 default:
268 default:
215 PyErr_SetString(PyExc_IndexError, "index out of range");
269 PyErr_SetString(PyExc_IndexError, "index out of range");
216 return NULL;
270 return NULL;
217 }
271 }
218 }
272 }
219
273
220 static PySequenceMethods dirstate_tuple_sq = {
274 static PySequenceMethods dirstate_tuple_sq = {
221 dirstate_tuple_length, /* sq_length */
275 dirstate_tuple_length, /* sq_length */
222 0, /* sq_concat */
276 0, /* sq_concat */
223 0, /* sq_repeat */
277 0, /* sq_repeat */
224 dirstate_tuple_item, /* sq_item */
278 dirstate_tuple_item, /* sq_item */
225 0, /* sq_ass_item */
279 0, /* sq_ass_item */
226 0, /* sq_contains */
280 0, /* sq_contains */
227 0, /* sq_inplace_concat */
281 0, /* sq_inplace_concat */
228 0 /* sq_inplace_repeat */
282 0 /* sq_inplace_repeat */
229 };
283 };
230
284
231 PyTypeObject dirstateTupleType = {
285 PyTypeObject dirstateTupleType = {
232 PyVarObject_HEAD_INIT(NULL, 0)
286 PyVarObject_HEAD_INIT(NULL, 0)
233 "dirstate_tuple", /* tp_name */
287 "dirstate_tuple", /* tp_name */
234 sizeof(dirstateTupleObject),/* tp_basicsize */
288 sizeof(dirstateTupleObject),/* tp_basicsize */
235 0, /* tp_itemsize */
289 0, /* tp_itemsize */
236 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
290 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
237 0, /* tp_print */
291 0, /* tp_print */
238 0, /* tp_getattr */
292 0, /* tp_getattr */
239 0, /* tp_setattr */
293 0, /* tp_setattr */
240 0, /* tp_compare */
294 0, /* tp_compare */
241 0, /* tp_repr */
295 0, /* tp_repr */
242 0, /* tp_as_number */
296 0, /* tp_as_number */
243 &dirstate_tuple_sq, /* tp_as_sequence */
297 &dirstate_tuple_sq, /* tp_as_sequence */
244 0, /* tp_as_mapping */
298 0, /* tp_as_mapping */
245 0, /* tp_hash */
299 0, /* tp_hash */
246 0, /* tp_call */
300 0, /* tp_call */
247 0, /* tp_str */
301 0, /* tp_str */
248 0, /* tp_getattro */
302 0, /* tp_getattro */
249 0, /* tp_setattro */
303 0, /* tp_setattro */
250 0, /* tp_as_buffer */
304 0, /* tp_as_buffer */
251 Py_TPFLAGS_DEFAULT, /* tp_flags */
305 Py_TPFLAGS_DEFAULT, /* tp_flags */
252 "dirstate tuple", /* tp_doc */
306 "dirstate tuple", /* tp_doc */
253 0, /* tp_traverse */
307 0, /* tp_traverse */
254 0, /* tp_clear */
308 0, /* tp_clear */
255 0, /* tp_richcompare */
309 0, /* tp_richcompare */
256 0, /* tp_weaklistoffset */
310 0, /* tp_weaklistoffset */
257 0, /* tp_iter */
311 0, /* tp_iter */
258 0, /* tp_iternext */
312 0, /* tp_iternext */
259 0, /* tp_methods */
313 0, /* tp_methods */
260 0, /* tp_members */
314 0, /* tp_members */
261 0, /* tp_getset */
315 0, /* tp_getset */
262 0, /* tp_base */
316 0, /* tp_base */
263 0, /* tp_dict */
317 0, /* tp_dict */
264 0, /* tp_descr_get */
318 0, /* tp_descr_get */
265 0, /* tp_descr_set */
319 0, /* tp_descr_set */
266 0, /* tp_dictoffset */
320 0, /* tp_dictoffset */
267 0, /* tp_init */
321 0, /* tp_init */
268 0, /* tp_alloc */
322 0, /* tp_alloc */
269 dirstate_tuple_new, /* tp_new */
323 dirstate_tuple_new, /* tp_new */
270 };
324 };
271
325
272 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
326 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
273 {
327 {
274 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
328 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
275 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
329 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
276 char state, *cur, *str, *cpos;
330 char state, *cur, *str, *cpos;
277 int mode, size, mtime;
331 int mode, size, mtime;
278 unsigned int flen, len, pos = 40;
332 unsigned int flen, len, pos = 40;
279 int readlen;
333 int readlen;
280
334
281 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
335 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
282 &PyDict_Type, &dmap,
336 &PyDict_Type, &dmap,
283 &PyDict_Type, &cmap,
337 &PyDict_Type, &cmap,
284 &str, &readlen))
338 &str, &readlen))
285 goto quit;
339 goto quit;
286
340
287 if (readlen < 0)
341 if (readlen < 0)
288 goto quit;
342 goto quit;
289
343
290 len = readlen;
344 len = readlen;
291
345
292 /* read parents */
346 /* read parents */
293 if (len < 40)
347 if (len < 40)
294 goto quit;
348 goto quit;
295
349
296 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
350 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
297 if (!parents)
351 if (!parents)
298 goto quit;
352 goto quit;
299
353
300 /* read filenames */
354 /* read filenames */
301 while (pos >= 40 && pos < len) {
355 while (pos >= 40 && pos < len) {
302 cur = str + pos;
356 cur = str + pos;
303 /* unpack header */
357 /* unpack header */
304 state = *cur;
358 state = *cur;
305 mode = getbe32(cur + 1);
359 mode = getbe32(cur + 1);
306 size = getbe32(cur + 5);
360 size = getbe32(cur + 5);
307 mtime = getbe32(cur + 9);
361 mtime = getbe32(cur + 9);
308 flen = getbe32(cur + 13);
362 flen = getbe32(cur + 13);
309 pos += 17;
363 pos += 17;
310 cur += 17;
364 cur += 17;
311 if (flen > len - pos) {
365 if (flen > len - pos) {
312 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
366 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
313 goto quit;
367 goto quit;
314 }
368 }
315
369
316 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
370 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
317 mtime);
371 mtime);
318 cpos = memchr(cur, 0, flen);
372 cpos = memchr(cur, 0, flen);
319 if (cpos) {
373 if (cpos) {
320 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
374 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
321 cname = PyBytes_FromStringAndSize(cpos + 1,
375 cname = PyBytes_FromStringAndSize(cpos + 1,
322 flen - (cpos - cur) - 1);
376 flen - (cpos - cur) - 1);
323 if (!fname || !cname ||
377 if (!fname || !cname ||
324 PyDict_SetItem(cmap, fname, cname) == -1 ||
378 PyDict_SetItem(cmap, fname, cname) == -1 ||
325 PyDict_SetItem(dmap, fname, entry) == -1)
379 PyDict_SetItem(dmap, fname, entry) == -1)
326 goto quit;
380 goto quit;
327 Py_DECREF(cname);
381 Py_DECREF(cname);
328 } else {
382 } else {
329 fname = PyBytes_FromStringAndSize(cur, flen);
383 fname = PyBytes_FromStringAndSize(cur, flen);
330 if (!fname ||
384 if (!fname ||
331 PyDict_SetItem(dmap, fname, entry) == -1)
385 PyDict_SetItem(dmap, fname, entry) == -1)
332 goto quit;
386 goto quit;
333 }
387 }
334 Py_DECREF(fname);
388 Py_DECREF(fname);
335 Py_DECREF(entry);
389 Py_DECREF(entry);
336 fname = cname = entry = NULL;
390 fname = cname = entry = NULL;
337 pos += flen;
391 pos += flen;
338 }
392 }
339
393
340 ret = parents;
394 ret = parents;
341 Py_INCREF(ret);
395 Py_INCREF(ret);
342 quit:
396 quit:
343 Py_XDECREF(fname);
397 Py_XDECREF(fname);
344 Py_XDECREF(cname);
398 Py_XDECREF(cname);
345 Py_XDECREF(entry);
399 Py_XDECREF(entry);
346 Py_XDECREF(parents);
400 Py_XDECREF(parents);
347 return ret;
401 return ret;
348 }
402 }
349
403
350 /*
404 /*
351 * Efficiently pack a dirstate object into its on-disk format.
405 * Efficiently pack a dirstate object into its on-disk format.
352 */
406 */
353 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
407 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
354 {
408 {
355 PyObject *packobj = NULL;
409 PyObject *packobj = NULL;
356 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
410 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
357 Py_ssize_t nbytes, pos, l;
411 Py_ssize_t nbytes, pos, l;
358 PyObject *k, *v, *pn;
412 PyObject *k, *v, *pn;
359 char *p, *s;
413 char *p, *s;
360 double now;
414 double now;
361
415
362 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
416 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
363 &PyDict_Type, &map, &PyDict_Type, &copymap,
417 &PyDict_Type, &map, &PyDict_Type, &copymap,
364 &pl, &now))
418 &pl, &now))
365 return NULL;
419 return NULL;
366
420
367 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
421 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
368 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
422 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
369 return NULL;
423 return NULL;
370 }
424 }
371
425
372 /* Figure out how much we need to allocate. */
426 /* Figure out how much we need to allocate. */
373 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
427 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
374 PyObject *c;
428 PyObject *c;
375 if (!PyString_Check(k)) {
429 if (!PyString_Check(k)) {
376 PyErr_SetString(PyExc_TypeError, "expected string key");
430 PyErr_SetString(PyExc_TypeError, "expected string key");
377 goto bail;
431 goto bail;
378 }
432 }
379 nbytes += PyString_GET_SIZE(k) + 17;
433 nbytes += PyString_GET_SIZE(k) + 17;
380 c = PyDict_GetItem(copymap, k);
434 c = PyDict_GetItem(copymap, k);
381 if (c) {
435 if (c) {
382 if (!PyString_Check(c)) {
436 if (!PyString_Check(c)) {
383 PyErr_SetString(PyExc_TypeError,
437 PyErr_SetString(PyExc_TypeError,
384 "expected string key");
438 "expected string key");
385 goto bail;
439 goto bail;
386 }
440 }
387 nbytes += PyString_GET_SIZE(c) + 1;
441 nbytes += PyString_GET_SIZE(c) + 1;
388 }
442 }
389 }
443 }
390
444
391 packobj = PyString_FromStringAndSize(NULL, nbytes);
445 packobj = PyString_FromStringAndSize(NULL, nbytes);
392 if (packobj == NULL)
446 if (packobj == NULL)
393 goto bail;
447 goto bail;
394
448
395 p = PyString_AS_STRING(packobj);
449 p = PyString_AS_STRING(packobj);
396
450
397 pn = PySequence_ITEM(pl, 0);
451 pn = PySequence_ITEM(pl, 0);
398 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
452 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
399 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
453 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
400 goto bail;
454 goto bail;
401 }
455 }
402 memcpy(p, s, l);
456 memcpy(p, s, l);
403 p += 20;
457 p += 20;
404 pn = PySequence_ITEM(pl, 1);
458 pn = PySequence_ITEM(pl, 1);
405 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
459 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
406 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
460 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
407 goto bail;
461 goto bail;
408 }
462 }
409 memcpy(p, s, l);
463 memcpy(p, s, l);
410 p += 20;
464 p += 20;
411
465
412 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
466 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
413 dirstateTupleObject *tuple;
467 dirstateTupleObject *tuple;
414 char state;
468 char state;
415 uint32_t mode, size, mtime;
469 uint32_t mode, size, mtime;
416 Py_ssize_t len, l;
470 Py_ssize_t len, l;
417 PyObject *o;
471 PyObject *o;
418 char *t;
472 char *t;
419
473
420 if (!dirstate_tuple_check(v)) {
474 if (!dirstate_tuple_check(v)) {
421 PyErr_SetString(PyExc_TypeError,
475 PyErr_SetString(PyExc_TypeError,
422 "expected a dirstate tuple");
476 "expected a dirstate tuple");
423 goto bail;
477 goto bail;
424 }
478 }
425 tuple = (dirstateTupleObject *)v;
479 tuple = (dirstateTupleObject *)v;
426
480
427 state = tuple->state;
481 state = tuple->state;
428 mode = tuple->mode;
482 mode = tuple->mode;
429 size = tuple->size;
483 size = tuple->size;
430 mtime = tuple->mtime;
484 mtime = tuple->mtime;
431 if (state == 'n' && mtime == (uint32_t)now) {
485 if (state == 'n' && mtime == (uint32_t)now) {
432 /* See pure/parsers.py:pack_dirstate for why we do
486 /* See pure/parsers.py:pack_dirstate for why we do
433 * this. */
487 * this. */
434 mtime = -1;
488 mtime = -1;
435 mtime_unset = (PyObject *)make_dirstate_tuple(
489 mtime_unset = (PyObject *)make_dirstate_tuple(
436 state, mode, size, mtime);
490 state, mode, size, mtime);
437 if (!mtime_unset)
491 if (!mtime_unset)
438 goto bail;
492 goto bail;
439 if (PyDict_SetItem(map, k, mtime_unset) == -1)
493 if (PyDict_SetItem(map, k, mtime_unset) == -1)
440 goto bail;
494 goto bail;
441 Py_DECREF(mtime_unset);
495 Py_DECREF(mtime_unset);
442 mtime_unset = NULL;
496 mtime_unset = NULL;
443 }
497 }
444 *p++ = state;
498 *p++ = state;
445 putbe32(mode, p);
499 putbe32(mode, p);
446 putbe32(size, p + 4);
500 putbe32(size, p + 4);
447 putbe32(mtime, p + 8);
501 putbe32(mtime, p + 8);
448 t = p + 12;
502 t = p + 12;
449 p += 16;
503 p += 16;
450 len = PyString_GET_SIZE(k);
504 len = PyString_GET_SIZE(k);
451 memcpy(p, PyString_AS_STRING(k), len);
505 memcpy(p, PyString_AS_STRING(k), len);
452 p += len;
506 p += len;
453 o = PyDict_GetItem(copymap, k);
507 o = PyDict_GetItem(copymap, k);
454 if (o) {
508 if (o) {
455 *p++ = '\0';
509 *p++ = '\0';
456 l = PyString_GET_SIZE(o);
510 l = PyString_GET_SIZE(o);
457 memcpy(p, PyString_AS_STRING(o), l);
511 memcpy(p, PyString_AS_STRING(o), l);
458 p += l;
512 p += l;
459 len += l + 1;
513 len += l + 1;
460 }
514 }
461 putbe32((uint32_t)len, t);
515 putbe32((uint32_t)len, t);
462 }
516 }
463
517
464 pos = p - PyString_AS_STRING(packobj);
518 pos = p - PyString_AS_STRING(packobj);
465 if (pos != nbytes) {
519 if (pos != nbytes) {
466 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
520 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
467 (long)pos, (long)nbytes);
521 (long)pos, (long)nbytes);
468 goto bail;
522 goto bail;
469 }
523 }
470
524
471 return packobj;
525 return packobj;
472 bail:
526 bail:
473 Py_XDECREF(mtime_unset);
527 Py_XDECREF(mtime_unset);
474 Py_XDECREF(packobj);
528 Py_XDECREF(packobj);
475 return NULL;
529 return NULL;
476 }
530 }
477
531
478 /*
532 /*
479 * A base-16 trie for fast node->rev mapping.
533 * A base-16 trie for fast node->rev mapping.
480 *
534 *
481 * Positive value is index of the next node in the trie
535 * Positive value is index of the next node in the trie
482 * Negative value is a leaf: -(rev + 1)
536 * Negative value is a leaf: -(rev + 1)
483 * Zero is empty
537 * Zero is empty
484 */
538 */
485 typedef struct {
539 typedef struct {
486 int children[16];
540 int children[16];
487 } nodetree;
541 } nodetree;
488
542
489 /*
543 /*
490 * This class has two behaviours.
544 * This class has two behaviours.
491 *
545 *
492 * When used in a list-like way (with integer keys), we decode an
546 * When used in a list-like way (with integer keys), we decode an
493 * entry in a RevlogNG index file on demand. Our last entry is a
547 * entry in a RevlogNG index file on demand. Our last entry is a
494 * sentinel, always a nullid. We have limited support for
548 * sentinel, always a nullid. We have limited support for
495 * integer-keyed insert and delete, only at elements right before the
549 * integer-keyed insert and delete, only at elements right before the
496 * sentinel.
550 * sentinel.
497 *
551 *
498 * With string keys, we lazily perform a reverse mapping from node to
552 * With string keys, we lazily perform a reverse mapping from node to
499 * rev, using a base-16 trie.
553 * rev, using a base-16 trie.
500 */
554 */
501 typedef struct {
555 typedef struct {
502 PyObject_HEAD
556 PyObject_HEAD
503 /* Type-specific fields go here. */
557 /* Type-specific fields go here. */
504 PyObject *data; /* raw bytes of index */
558 PyObject *data; /* raw bytes of index */
505 PyObject **cache; /* cached tuples */
559 PyObject **cache; /* cached tuples */
506 const char **offsets; /* populated on demand */
560 const char **offsets; /* populated on demand */
507 Py_ssize_t raw_length; /* original number of elements */
561 Py_ssize_t raw_length; /* original number of elements */
508 Py_ssize_t length; /* current number of elements */
562 Py_ssize_t length; /* current number of elements */
509 PyObject *added; /* populated on demand */
563 PyObject *added; /* populated on demand */
510 PyObject *headrevs; /* cache, invalidated on changes */
564 PyObject *headrevs; /* cache, invalidated on changes */
511 PyObject *filteredrevs;/* filtered revs set */
565 PyObject *filteredrevs;/* filtered revs set */
512 nodetree *nt; /* base-16 trie */
566 nodetree *nt; /* base-16 trie */
513 int ntlength; /* # nodes in use */
567 int ntlength; /* # nodes in use */
514 int ntcapacity; /* # nodes allocated */
568 int ntcapacity; /* # nodes allocated */
515 int ntdepth; /* maximum depth of tree */
569 int ntdepth; /* maximum depth of tree */
516 int ntsplits; /* # splits performed */
570 int ntsplits; /* # splits performed */
517 int ntrev; /* last rev scanned */
571 int ntrev; /* last rev scanned */
518 int ntlookups; /* # lookups */
572 int ntlookups; /* # lookups */
519 int ntmisses; /* # lookups that miss the cache */
573 int ntmisses; /* # lookups that miss the cache */
520 int inlined;
574 int inlined;
521 } indexObject;
575 } indexObject;
522
576
523 static Py_ssize_t index_length(const indexObject *self)
577 static Py_ssize_t index_length(const indexObject *self)
524 {
578 {
525 if (self->added == NULL)
579 if (self->added == NULL)
526 return self->length;
580 return self->length;
527 return self->length + PyList_GET_SIZE(self->added);
581 return self->length + PyList_GET_SIZE(self->added);
528 }
582 }
529
583
530 static PyObject *nullentry;
584 static PyObject *nullentry;
531 static const char nullid[20];
585 static const char nullid[20];
532
586
533 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
587 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
534
588
535 #if LONG_MAX == 0x7fffffffL
589 #if LONG_MAX == 0x7fffffffL
536 static char *tuple_format = "Kiiiiiis#";
590 static char *tuple_format = "Kiiiiiis#";
537 #else
591 #else
538 static char *tuple_format = "kiiiiiis#";
592 static char *tuple_format = "kiiiiiis#";
539 #endif
593 #endif
540
594
541 /* A RevlogNG v1 index entry is 64 bytes long. */
595 /* A RevlogNG v1 index entry is 64 bytes long. */
542 static const long v1_hdrsize = 64;
596 static const long v1_hdrsize = 64;
543
597
544 /*
598 /*
545 * Return a pointer to the beginning of a RevlogNG record.
599 * Return a pointer to the beginning of a RevlogNG record.
546 */
600 */
547 static const char *index_deref(indexObject *self, Py_ssize_t pos)
601 static const char *index_deref(indexObject *self, Py_ssize_t pos)
548 {
602 {
549 if (self->inlined && pos > 0) {
603 if (self->inlined && pos > 0) {
550 if (self->offsets == NULL) {
604 if (self->offsets == NULL) {
551 self->offsets = malloc(self->raw_length *
605 self->offsets = malloc(self->raw_length *
552 sizeof(*self->offsets));
606 sizeof(*self->offsets));
553 if (self->offsets == NULL)
607 if (self->offsets == NULL)
554 return (const char *)PyErr_NoMemory();
608 return (const char *)PyErr_NoMemory();
555 inline_scan(self, self->offsets);
609 inline_scan(self, self->offsets);
556 }
610 }
557 return self->offsets[pos];
611 return self->offsets[pos];
558 }
612 }
559
613
560 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
614 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
561 }
615 }
562
616
563 /*
617 /*
564 * RevlogNG format (all in big endian, data may be inlined):
618 * RevlogNG format (all in big endian, data may be inlined):
565 * 6 bytes: offset
619 * 6 bytes: offset
566 * 2 bytes: flags
620 * 2 bytes: flags
567 * 4 bytes: compressed length
621 * 4 bytes: compressed length
568 * 4 bytes: uncompressed length
622 * 4 bytes: uncompressed length
569 * 4 bytes: base revision
623 * 4 bytes: base revision
570 * 4 bytes: link revision
624 * 4 bytes: link revision
571 * 4 bytes: parent 1 revision
625 * 4 bytes: parent 1 revision
572 * 4 bytes: parent 2 revision
626 * 4 bytes: parent 2 revision
573 * 32 bytes: nodeid (only 20 bytes used)
627 * 32 bytes: nodeid (only 20 bytes used)
574 */
628 */
575 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
629 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
576 {
630 {
577 uint64_t offset_flags;
631 uint64_t offset_flags;
578 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
632 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
579 const char *c_node_id;
633 const char *c_node_id;
580 const char *data;
634 const char *data;
581 Py_ssize_t length = index_length(self);
635 Py_ssize_t length = index_length(self);
582 PyObject *entry;
636 PyObject *entry;
583
637
584 if (pos < 0)
638 if (pos < 0)
585 pos += length;
639 pos += length;
586
640
587 if (pos < 0 || pos >= length) {
641 if (pos < 0 || pos >= length) {
588 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
642 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
589 return NULL;
643 return NULL;
590 }
644 }
591
645
592 if (pos == length - 1) {
646 if (pos == length - 1) {
593 Py_INCREF(nullentry);
647 Py_INCREF(nullentry);
594 return nullentry;
648 return nullentry;
595 }
649 }
596
650
597 if (pos >= self->length - 1) {
651 if (pos >= self->length - 1) {
598 PyObject *obj;
652 PyObject *obj;
599 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
653 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
600 Py_INCREF(obj);
654 Py_INCREF(obj);
601 return obj;
655 return obj;
602 }
656 }
603
657
604 if (self->cache) {
658 if (self->cache) {
605 if (self->cache[pos]) {
659 if (self->cache[pos]) {
606 Py_INCREF(self->cache[pos]);
660 Py_INCREF(self->cache[pos]);
607 return self->cache[pos];
661 return self->cache[pos];
608 }
662 }
609 } else {
663 } else {
610 self->cache = calloc(self->raw_length, sizeof(PyObject *));
664 self->cache = calloc(self->raw_length, sizeof(PyObject *));
611 if (self->cache == NULL)
665 if (self->cache == NULL)
612 return PyErr_NoMemory();
666 return PyErr_NoMemory();
613 }
667 }
614
668
615 data = index_deref(self, pos);
669 data = index_deref(self, pos);
616 if (data == NULL)
670 if (data == NULL)
617 return NULL;
671 return NULL;
618
672
619 offset_flags = getbe32(data + 4);
673 offset_flags = getbe32(data + 4);
620 if (pos == 0) /* mask out version number for the first entry */
674 if (pos == 0) /* mask out version number for the first entry */
621 offset_flags &= 0xFFFF;
675 offset_flags &= 0xFFFF;
622 else {
676 else {
623 uint32_t offset_high = getbe32(data);
677 uint32_t offset_high = getbe32(data);
624 offset_flags |= ((uint64_t)offset_high) << 32;
678 offset_flags |= ((uint64_t)offset_high) << 32;
625 }
679 }
626
680
627 comp_len = getbe32(data + 8);
681 comp_len = getbe32(data + 8);
628 uncomp_len = getbe32(data + 12);
682 uncomp_len = getbe32(data + 12);
629 base_rev = getbe32(data + 16);
683 base_rev = getbe32(data + 16);
630 link_rev = getbe32(data + 20);
684 link_rev = getbe32(data + 20);
631 parent_1 = getbe32(data + 24);
685 parent_1 = getbe32(data + 24);
632 parent_2 = getbe32(data + 28);
686 parent_2 = getbe32(data + 28);
633 c_node_id = data + 32;
687 c_node_id = data + 32;
634
688
635 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
689 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
636 uncomp_len, base_rev, link_rev,
690 uncomp_len, base_rev, link_rev,
637 parent_1, parent_2, c_node_id, 20);
691 parent_1, parent_2, c_node_id, 20);
638
692
639 if (entry) {
693 if (entry) {
640 PyObject_GC_UnTrack(entry);
694 PyObject_GC_UnTrack(entry);
641 Py_INCREF(entry);
695 Py_INCREF(entry);
642 }
696 }
643
697
644 self->cache[pos] = entry;
698 self->cache[pos] = entry;
645
699
646 return entry;
700 return entry;
647 }
701 }
648
702
649 /*
703 /*
650 * Return the 20-byte SHA of the node corresponding to the given rev.
704 * Return the 20-byte SHA of the node corresponding to the given rev.
651 */
705 */
652 static const char *index_node(indexObject *self, Py_ssize_t pos)
706 static const char *index_node(indexObject *self, Py_ssize_t pos)
653 {
707 {
654 Py_ssize_t length = index_length(self);
708 Py_ssize_t length = index_length(self);
655 const char *data;
709 const char *data;
656
710
657 if (pos == length - 1 || pos == INT_MAX)
711 if (pos == length - 1 || pos == INT_MAX)
658 return nullid;
712 return nullid;
659
713
660 if (pos >= length)
714 if (pos >= length)
661 return NULL;
715 return NULL;
662
716
663 if (pos >= self->length - 1) {
717 if (pos >= self->length - 1) {
664 PyObject *tuple, *str;
718 PyObject *tuple, *str;
665 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
719 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
666 str = PyTuple_GetItem(tuple, 7);
720 str = PyTuple_GetItem(tuple, 7);
667 return str ? PyString_AS_STRING(str) : NULL;
721 return str ? PyString_AS_STRING(str) : NULL;
668 }
722 }
669
723
670 data = index_deref(self, pos);
724 data = index_deref(self, pos);
671 return data ? data + 32 : NULL;
725 return data ? data + 32 : NULL;
672 }
726 }
673
727
674 static int nt_insert(indexObject *self, const char *node, int rev);
728 static int nt_insert(indexObject *self, const char *node, int rev);
675
729
676 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
730 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
677 {
731 {
678 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
732 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
679 return -1;
733 return -1;
680 if (*nodelen == 20)
734 if (*nodelen == 20)
681 return 0;
735 return 0;
682 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
736 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
683 return -1;
737 return -1;
684 }
738 }
685
739
686 static PyObject *index_insert(indexObject *self, PyObject *args)
740 static PyObject *index_insert(indexObject *self, PyObject *args)
687 {
741 {
688 PyObject *obj;
742 PyObject *obj;
689 char *node;
743 char *node;
690 int index;
744 int index;
691 Py_ssize_t len, nodelen;
745 Py_ssize_t len, nodelen;
692
746
693 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
747 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
694 return NULL;
748 return NULL;
695
749
696 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
750 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
697 PyErr_SetString(PyExc_TypeError, "8-tuple required");
751 PyErr_SetString(PyExc_TypeError, "8-tuple required");
698 return NULL;
752 return NULL;
699 }
753 }
700
754
701 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
755 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
702 return NULL;
756 return NULL;
703
757
704 len = index_length(self);
758 len = index_length(self);
705
759
706 if (index < 0)
760 if (index < 0)
707 index += len;
761 index += len;
708
762
709 if (index != len - 1) {
763 if (index != len - 1) {
710 PyErr_SetString(PyExc_IndexError,
764 PyErr_SetString(PyExc_IndexError,
711 "insert only supported at index -1");
765 "insert only supported at index -1");
712 return NULL;
766 return NULL;
713 }
767 }
714
768
715 if (self->added == NULL) {
769 if (self->added == NULL) {
716 self->added = PyList_New(0);
770 self->added = PyList_New(0);
717 if (self->added == NULL)
771 if (self->added == NULL)
718 return NULL;
772 return NULL;
719 }
773 }
720
774
721 if (PyList_Append(self->added, obj) == -1)
775 if (PyList_Append(self->added, obj) == -1)
722 return NULL;
776 return NULL;
723
777
724 if (self->nt)
778 if (self->nt)
725 nt_insert(self, node, index);
779 nt_insert(self, node, index);
726
780
727 Py_CLEAR(self->headrevs);
781 Py_CLEAR(self->headrevs);
728 Py_RETURN_NONE;
782 Py_RETURN_NONE;
729 }
783 }
730
784
731 static void _index_clearcaches(indexObject *self)
785 static void _index_clearcaches(indexObject *self)
732 {
786 {
733 if (self->cache) {
787 if (self->cache) {
734 Py_ssize_t i;
788 Py_ssize_t i;
735
789
736 for (i = 0; i < self->raw_length; i++)
790 for (i = 0; i < self->raw_length; i++)
737 Py_CLEAR(self->cache[i]);
791 Py_CLEAR(self->cache[i]);
738 free(self->cache);
792 free(self->cache);
739 self->cache = NULL;
793 self->cache = NULL;
740 }
794 }
741 if (self->offsets) {
795 if (self->offsets) {
742 free(self->offsets);
796 free(self->offsets);
743 self->offsets = NULL;
797 self->offsets = NULL;
744 }
798 }
745 if (self->nt) {
799 if (self->nt) {
746 free(self->nt);
800 free(self->nt);
747 self->nt = NULL;
801 self->nt = NULL;
748 }
802 }
749 Py_CLEAR(self->headrevs);
803 Py_CLEAR(self->headrevs);
750 }
804 }
751
805
752 static PyObject *index_clearcaches(indexObject *self)
806 static PyObject *index_clearcaches(indexObject *self)
753 {
807 {
754 _index_clearcaches(self);
808 _index_clearcaches(self);
755 self->ntlength = self->ntcapacity = 0;
809 self->ntlength = self->ntcapacity = 0;
756 self->ntdepth = self->ntsplits = 0;
810 self->ntdepth = self->ntsplits = 0;
757 self->ntrev = -1;
811 self->ntrev = -1;
758 self->ntlookups = self->ntmisses = 0;
812 self->ntlookups = self->ntmisses = 0;
759 Py_RETURN_NONE;
813 Py_RETURN_NONE;
760 }
814 }
761
815
762 static PyObject *index_stats(indexObject *self)
816 static PyObject *index_stats(indexObject *self)
763 {
817 {
764 PyObject *obj = PyDict_New();
818 PyObject *obj = PyDict_New();
765
819
766 if (obj == NULL)
820 if (obj == NULL)
767 return NULL;
821 return NULL;
768
822
769 #define istat(__n, __d) \
823 #define istat(__n, __d) \
770 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
824 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
771 goto bail;
825 goto bail;
772
826
773 if (self->added) {
827 if (self->added) {
774 Py_ssize_t len = PyList_GET_SIZE(self->added);
828 Py_ssize_t len = PyList_GET_SIZE(self->added);
775 if (PyDict_SetItemString(obj, "index entries added",
829 if (PyDict_SetItemString(obj, "index entries added",
776 PyInt_FromSsize_t(len)) == -1)
830 PyInt_FromSsize_t(len)) == -1)
777 goto bail;
831 goto bail;
778 }
832 }
779
833
780 if (self->raw_length != self->length - 1)
834 if (self->raw_length != self->length - 1)
781 istat(raw_length, "revs on disk");
835 istat(raw_length, "revs on disk");
782 istat(length, "revs in memory");
836 istat(length, "revs in memory");
783 istat(ntcapacity, "node trie capacity");
837 istat(ntcapacity, "node trie capacity");
784 istat(ntdepth, "node trie depth");
838 istat(ntdepth, "node trie depth");
785 istat(ntlength, "node trie count");
839 istat(ntlength, "node trie count");
786 istat(ntlookups, "node trie lookups");
840 istat(ntlookups, "node trie lookups");
787 istat(ntmisses, "node trie misses");
841 istat(ntmisses, "node trie misses");
788 istat(ntrev, "node trie last rev scanned");
842 istat(ntrev, "node trie last rev scanned");
789 istat(ntsplits, "node trie splits");
843 istat(ntsplits, "node trie splits");
790
844
791 #undef istat
845 #undef istat
792
846
793 return obj;
847 return obj;
794
848
795 bail:
849 bail:
796 Py_XDECREF(obj);
850 Py_XDECREF(obj);
797 return NULL;
851 return NULL;
798 }
852 }
799
853
800 /*
854 /*
801 * When we cache a list, we want to be sure the caller can't mutate
855 * When we cache a list, we want to be sure the caller can't mutate
802 * the cached copy.
856 * the cached copy.
803 */
857 */
804 static PyObject *list_copy(PyObject *list)
858 static PyObject *list_copy(PyObject *list)
805 {
859 {
806 Py_ssize_t len = PyList_GET_SIZE(list);
860 Py_ssize_t len = PyList_GET_SIZE(list);
807 PyObject *newlist = PyList_New(len);
861 PyObject *newlist = PyList_New(len);
808 Py_ssize_t i;
862 Py_ssize_t i;
809
863
810 if (newlist == NULL)
864 if (newlist == NULL)
811 return NULL;
865 return NULL;
812
866
813 for (i = 0; i < len; i++) {
867 for (i = 0; i < len; i++) {
814 PyObject *obj = PyList_GET_ITEM(list, i);
868 PyObject *obj = PyList_GET_ITEM(list, i);
815 Py_INCREF(obj);
869 Py_INCREF(obj);
816 PyList_SET_ITEM(newlist, i, obj);
870 PyList_SET_ITEM(newlist, i, obj);
817 }
871 }
818
872
819 return newlist;
873 return newlist;
820 }
874 }
821
875
822 static int check_filter(PyObject *filter, Py_ssize_t arg) {
876 static int check_filter(PyObject *filter, Py_ssize_t arg) {
823 if (filter) {
877 if (filter) {
824 PyObject *arglist, *result;
878 PyObject *arglist, *result;
825 int isfiltered;
879 int isfiltered;
826
880
827 arglist = Py_BuildValue("(n)", arg);
881 arglist = Py_BuildValue("(n)", arg);
828 if (!arglist) {
882 if (!arglist) {
829 return -1;
883 return -1;
830 }
884 }
831
885
832 result = PyEval_CallObject(filter, arglist);
886 result = PyEval_CallObject(filter, arglist);
833 Py_DECREF(arglist);
887 Py_DECREF(arglist);
834 if (!result) {
888 if (!result) {
835 return -1;
889 return -1;
836 }
890 }
837
891
838 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
892 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
839 * same as this function, so we can just return it directly.*/
893 * same as this function, so we can just return it directly.*/
840 isfiltered = PyObject_IsTrue(result);
894 isfiltered = PyObject_IsTrue(result);
841 Py_DECREF(result);
895 Py_DECREF(result);
842 return isfiltered;
896 return isfiltered;
843 } else {
897 } else {
844 return 0;
898 return 0;
845 }
899 }
846 }
900 }
847
901
848 static PyObject *index_headrevs(indexObject *self, PyObject *args)
902 static PyObject *index_headrevs(indexObject *self, PyObject *args)
849 {
903 {
850 Py_ssize_t i, len, addlen;
904 Py_ssize_t i, len, addlen;
851 char *nothead = NULL;
905 char *nothead = NULL;
852 PyObject *heads = NULL;
906 PyObject *heads = NULL;
853 PyObject *filter = NULL;
907 PyObject *filter = NULL;
854 PyObject *filteredrevs = Py_None;
908 PyObject *filteredrevs = Py_None;
855
909
856 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
910 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
857 return NULL;
911 return NULL;
858 }
912 }
859
913
860 if (self->headrevs && filteredrevs == self->filteredrevs)
914 if (self->headrevs && filteredrevs == self->filteredrevs)
861 return list_copy(self->headrevs);
915 return list_copy(self->headrevs);
862
916
863 Py_DECREF(self->filteredrevs);
917 Py_DECREF(self->filteredrevs);
864 self->filteredrevs = filteredrevs;
918 self->filteredrevs = filteredrevs;
865 Py_INCREF(filteredrevs);
919 Py_INCREF(filteredrevs);
866
920
867 if (filteredrevs != Py_None) {
921 if (filteredrevs != Py_None) {
868 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
922 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
869 if (!filter) {
923 if (!filter) {
870 PyErr_SetString(PyExc_TypeError,
924 PyErr_SetString(PyExc_TypeError,
871 "filteredrevs has no attribute __contains__");
925 "filteredrevs has no attribute __contains__");
872 goto bail;
926 goto bail;
873 }
927 }
874 }
928 }
875
929
876 len = index_length(self) - 1;
930 len = index_length(self) - 1;
877 heads = PyList_New(0);
931 heads = PyList_New(0);
878 if (heads == NULL)
932 if (heads == NULL)
879 goto bail;
933 goto bail;
880 if (len == 0) {
934 if (len == 0) {
881 PyObject *nullid = PyInt_FromLong(-1);
935 PyObject *nullid = PyInt_FromLong(-1);
882 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
936 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
883 Py_XDECREF(nullid);
937 Py_XDECREF(nullid);
884 goto bail;
938 goto bail;
885 }
939 }
886 goto done;
940 goto done;
887 }
941 }
888
942
889 nothead = calloc(len, 1);
943 nothead = calloc(len, 1);
890 if (nothead == NULL)
944 if (nothead == NULL)
891 goto bail;
945 goto bail;
892
946
893 for (i = 0; i < self->raw_length; i++) {
947 for (i = 0; i < self->raw_length; i++) {
894 const char *data;
948 const char *data;
895 int parent_1, parent_2, isfiltered;
949 int parent_1, parent_2, isfiltered;
896
950
897 isfiltered = check_filter(filter, i);
951 isfiltered = check_filter(filter, i);
898 if (isfiltered == -1) {
952 if (isfiltered == -1) {
899 PyErr_SetString(PyExc_TypeError,
953 PyErr_SetString(PyExc_TypeError,
900 "unable to check filter");
954 "unable to check filter");
901 goto bail;
955 goto bail;
902 }
956 }
903
957
904 if (isfiltered) {
958 if (isfiltered) {
905 nothead[i] = 1;
959 nothead[i] = 1;
906 continue;
960 continue;
907 }
961 }
908
962
909 data = index_deref(self, i);
963 data = index_deref(self, i);
910 parent_1 = getbe32(data + 24);
964 parent_1 = getbe32(data + 24);
911 parent_2 = getbe32(data + 28);
965 parent_2 = getbe32(data + 28);
912
966
913 if (parent_1 >= 0)
967 if (parent_1 >= 0)
914 nothead[parent_1] = 1;
968 nothead[parent_1] = 1;
915 if (parent_2 >= 0)
969 if (parent_2 >= 0)
916 nothead[parent_2] = 1;
970 nothead[parent_2] = 1;
917 }
971 }
918
972
919 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
973 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
920
974
921 for (i = 0; i < addlen; i++) {
975 for (i = 0; i < addlen; i++) {
922 PyObject *rev = PyList_GET_ITEM(self->added, i);
976 PyObject *rev = PyList_GET_ITEM(self->added, i);
923 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
977 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
924 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
978 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
925 long parent_1, parent_2;
979 long parent_1, parent_2;
926 int isfiltered;
980 int isfiltered;
927
981
928 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
982 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
929 PyErr_SetString(PyExc_TypeError,
983 PyErr_SetString(PyExc_TypeError,
930 "revlog parents are invalid");
984 "revlog parents are invalid");
931 goto bail;
985 goto bail;
932 }
986 }
933
987
934 isfiltered = check_filter(filter, i);
988 isfiltered = check_filter(filter, i);
935 if (isfiltered == -1) {
989 if (isfiltered == -1) {
936 PyErr_SetString(PyExc_TypeError,
990 PyErr_SetString(PyExc_TypeError,
937 "unable to check filter");
991 "unable to check filter");
938 goto bail;
992 goto bail;
939 }
993 }
940
994
941 if (isfiltered) {
995 if (isfiltered) {
942 nothead[i] = 1;
996 nothead[i] = 1;
943 continue;
997 continue;
944 }
998 }
945
999
946 parent_1 = PyInt_AS_LONG(p1);
1000 parent_1 = PyInt_AS_LONG(p1);
947 parent_2 = PyInt_AS_LONG(p2);
1001 parent_2 = PyInt_AS_LONG(p2);
948 if (parent_1 >= 0)
1002 if (parent_1 >= 0)
949 nothead[parent_1] = 1;
1003 nothead[parent_1] = 1;
950 if (parent_2 >= 0)
1004 if (parent_2 >= 0)
951 nothead[parent_2] = 1;
1005 nothead[parent_2] = 1;
952 }
1006 }
953
1007
954 for (i = 0; i < len; i++) {
1008 for (i = 0; i < len; i++) {
955 PyObject *head;
1009 PyObject *head;
956
1010
957 if (nothead[i])
1011 if (nothead[i])
958 continue;
1012 continue;
959 head = PyInt_FromSsize_t(i);
1013 head = PyInt_FromSsize_t(i);
960 if (head == NULL || PyList_Append(heads, head) == -1) {
1014 if (head == NULL || PyList_Append(heads, head) == -1) {
961 Py_XDECREF(head);
1015 Py_XDECREF(head);
962 goto bail;
1016 goto bail;
963 }
1017 }
964 }
1018 }
965
1019
966 done:
1020 done:
967 self->headrevs = heads;
1021 self->headrevs = heads;
968 Py_XDECREF(filter);
1022 Py_XDECREF(filter);
969 free(nothead);
1023 free(nothead);
970 return list_copy(self->headrevs);
1024 return list_copy(self->headrevs);
971 bail:
1025 bail:
972 Py_XDECREF(filter);
1026 Py_XDECREF(filter);
973 Py_XDECREF(heads);
1027 Py_XDECREF(heads);
974 free(nothead);
1028 free(nothead);
975 return NULL;
1029 return NULL;
976 }
1030 }
977
1031
978 static inline int nt_level(const char *node, Py_ssize_t level)
1032 static inline int nt_level(const char *node, Py_ssize_t level)
979 {
1033 {
980 int v = node[level>>1];
1034 int v = node[level>>1];
981 if (!(level & 1))
1035 if (!(level & 1))
982 v >>= 4;
1036 v >>= 4;
983 return v & 0xf;
1037 return v & 0xf;
984 }
1038 }
985
1039
986 /*
1040 /*
987 * Return values:
1041 * Return values:
988 *
1042 *
989 * -4: match is ambiguous (multiple candidates)
1043 * -4: match is ambiguous (multiple candidates)
990 * -2: not found
1044 * -2: not found
991 * rest: valid rev
1045 * rest: valid rev
992 */
1046 */
993 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
1047 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
994 int hex)
1048 int hex)
995 {
1049 {
996 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1050 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
997 int level, maxlevel, off;
1051 int level, maxlevel, off;
998
1052
999 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1053 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1000 return -1;
1054 return -1;
1001
1055
1002 if (self->nt == NULL)
1056 if (self->nt == NULL)
1003 return -2;
1057 return -2;
1004
1058
1005 if (hex)
1059 if (hex)
1006 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1060 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1007 else
1061 else
1008 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1062 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1009
1063
1010 for (level = off = 0; level < maxlevel; level++) {
1064 for (level = off = 0; level < maxlevel; level++) {
1011 int k = getnybble(node, level);
1065 int k = getnybble(node, level);
1012 nodetree *n = &self->nt[off];
1066 nodetree *n = &self->nt[off];
1013 int v = n->children[k];
1067 int v = n->children[k];
1014
1068
1015 if (v < 0) {
1069 if (v < 0) {
1016 const char *n;
1070 const char *n;
1017 Py_ssize_t i;
1071 Py_ssize_t i;
1018
1072
1019 v = -v - 1;
1073 v = -v - 1;
1020 n = index_node(self, v);
1074 n = index_node(self, v);
1021 if (n == NULL)
1075 if (n == NULL)
1022 return -2;
1076 return -2;
1023 for (i = level; i < maxlevel; i++)
1077 for (i = level; i < maxlevel; i++)
1024 if (getnybble(node, i) != nt_level(n, i))
1078 if (getnybble(node, i) != nt_level(n, i))
1025 return -2;
1079 return -2;
1026 return v;
1080 return v;
1027 }
1081 }
1028 if (v == 0)
1082 if (v == 0)
1029 return -2;
1083 return -2;
1030 off = v;
1084 off = v;
1031 }
1085 }
1032 /* multiple matches against an ambiguous prefix */
1086 /* multiple matches against an ambiguous prefix */
1033 return -4;
1087 return -4;
1034 }
1088 }
1035
1089
1036 static int nt_new(indexObject *self)
1090 static int nt_new(indexObject *self)
1037 {
1091 {
1038 if (self->ntlength == self->ntcapacity) {
1092 if (self->ntlength == self->ntcapacity) {
1039 self->ntcapacity *= 2;
1093 self->ntcapacity *= 2;
1040 self->nt = realloc(self->nt,
1094 self->nt = realloc(self->nt,
1041 self->ntcapacity * sizeof(nodetree));
1095 self->ntcapacity * sizeof(nodetree));
1042 if (self->nt == NULL) {
1096 if (self->nt == NULL) {
1043 PyErr_SetString(PyExc_MemoryError, "out of memory");
1097 PyErr_SetString(PyExc_MemoryError, "out of memory");
1044 return -1;
1098 return -1;
1045 }
1099 }
1046 memset(&self->nt[self->ntlength], 0,
1100 memset(&self->nt[self->ntlength], 0,
1047 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1101 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1048 }
1102 }
1049 return self->ntlength++;
1103 return self->ntlength++;
1050 }
1104 }
1051
1105
1052 static int nt_insert(indexObject *self, const char *node, int rev)
1106 static int nt_insert(indexObject *self, const char *node, int rev)
1053 {
1107 {
1054 int level = 0;
1108 int level = 0;
1055 int off = 0;
1109 int off = 0;
1056
1110
1057 while (level < 40) {
1111 while (level < 40) {
1058 int k = nt_level(node, level);
1112 int k = nt_level(node, level);
1059 nodetree *n;
1113 nodetree *n;
1060 int v;
1114 int v;
1061
1115
1062 n = &self->nt[off];
1116 n = &self->nt[off];
1063 v = n->children[k];
1117 v = n->children[k];
1064
1118
1065 if (v == 0) {
1119 if (v == 0) {
1066 n->children[k] = -rev - 1;
1120 n->children[k] = -rev - 1;
1067 return 0;
1121 return 0;
1068 }
1122 }
1069 if (v < 0) {
1123 if (v < 0) {
1070 const char *oldnode = index_node(self, -v - 1);
1124 const char *oldnode = index_node(self, -v - 1);
1071 int noff;
1125 int noff;
1072
1126
1073 if (!oldnode || !memcmp(oldnode, node, 20)) {
1127 if (!oldnode || !memcmp(oldnode, node, 20)) {
1074 n->children[k] = -rev - 1;
1128 n->children[k] = -rev - 1;
1075 return 0;
1129 return 0;
1076 }
1130 }
1077 noff = nt_new(self);
1131 noff = nt_new(self);
1078 if (noff == -1)
1132 if (noff == -1)
1079 return -1;
1133 return -1;
1080 /* self->nt may have been changed by realloc */
1134 /* self->nt may have been changed by realloc */
1081 self->nt[off].children[k] = noff;
1135 self->nt[off].children[k] = noff;
1082 off = noff;
1136 off = noff;
1083 n = &self->nt[off];
1137 n = &self->nt[off];
1084 n->children[nt_level(oldnode, ++level)] = v;
1138 n->children[nt_level(oldnode, ++level)] = v;
1085 if (level > self->ntdepth)
1139 if (level > self->ntdepth)
1086 self->ntdepth = level;
1140 self->ntdepth = level;
1087 self->ntsplits += 1;
1141 self->ntsplits += 1;
1088 } else {
1142 } else {
1089 level += 1;
1143 level += 1;
1090 off = v;
1144 off = v;
1091 }
1145 }
1092 }
1146 }
1093
1147
1094 return -1;
1148 return -1;
1095 }
1149 }
1096
1150
1097 static int nt_init(indexObject *self)
1151 static int nt_init(indexObject *self)
1098 {
1152 {
1099 if (self->nt == NULL) {
1153 if (self->nt == NULL) {
1100 if (self->raw_length > INT_MAX) {
1154 if (self->raw_length > INT_MAX) {
1101 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1155 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1102 return -1;
1156 return -1;
1103 }
1157 }
1104 self->ntcapacity = self->raw_length < 4
1158 self->ntcapacity = self->raw_length < 4
1105 ? 4 : (int)self->raw_length / 2;
1159 ? 4 : (int)self->raw_length / 2;
1106
1160
1107 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1161 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1108 if (self->nt == NULL) {
1162 if (self->nt == NULL) {
1109 PyErr_NoMemory();
1163 PyErr_NoMemory();
1110 return -1;
1164 return -1;
1111 }
1165 }
1112 self->ntlength = 1;
1166 self->ntlength = 1;
1113 self->ntrev = (int)index_length(self) - 1;
1167 self->ntrev = (int)index_length(self) - 1;
1114 self->ntlookups = 1;
1168 self->ntlookups = 1;
1115 self->ntmisses = 0;
1169 self->ntmisses = 0;
1116 if (nt_insert(self, nullid, INT_MAX) == -1)
1170 if (nt_insert(self, nullid, INT_MAX) == -1)
1117 return -1;
1171 return -1;
1118 }
1172 }
1119 return 0;
1173 return 0;
1120 }
1174 }
1121
1175
1122 /*
1176 /*
1123 * Return values:
1177 * Return values:
1124 *
1178 *
1125 * -3: error (exception set)
1179 * -3: error (exception set)
1126 * -2: not found (no exception set)
1180 * -2: not found (no exception set)
1127 * rest: valid rev
1181 * rest: valid rev
1128 */
1182 */
1129 static int index_find_node(indexObject *self,
1183 static int index_find_node(indexObject *self,
1130 const char *node, Py_ssize_t nodelen)
1184 const char *node, Py_ssize_t nodelen)
1131 {
1185 {
1132 int rev;
1186 int rev;
1133
1187
1134 self->ntlookups++;
1188 self->ntlookups++;
1135 rev = nt_find(self, node, nodelen, 0);
1189 rev = nt_find(self, node, nodelen, 0);
1136 if (rev >= -1)
1190 if (rev >= -1)
1137 return rev;
1191 return rev;
1138
1192
1139 if (nt_init(self) == -1)
1193 if (nt_init(self) == -1)
1140 return -3;
1194 return -3;
1141
1195
1142 /*
1196 /*
1143 * For the first handful of lookups, we scan the entire index,
1197 * For the first handful of lookups, we scan the entire index,
1144 * and cache only the matching nodes. This optimizes for cases
1198 * and cache only the matching nodes. This optimizes for cases
1145 * like "hg tip", where only a few nodes are accessed.
1199 * like "hg tip", where only a few nodes are accessed.
1146 *
1200 *
1147 * After that, we cache every node we visit, using a single
1201 * After that, we cache every node we visit, using a single
1148 * scan amortized over multiple lookups. This gives the best
1202 * scan amortized over multiple lookups. This gives the best
1149 * bulk performance, e.g. for "hg log".
1203 * bulk performance, e.g. for "hg log".
1150 */
1204 */
1151 if (self->ntmisses++ < 4) {
1205 if (self->ntmisses++ < 4) {
1152 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1206 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1153 const char *n = index_node(self, rev);
1207 const char *n = index_node(self, rev);
1154 if (n == NULL)
1208 if (n == NULL)
1155 return -2;
1209 return -2;
1156 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1210 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1157 if (nt_insert(self, n, rev) == -1)
1211 if (nt_insert(self, n, rev) == -1)
1158 return -3;
1212 return -3;
1159 break;
1213 break;
1160 }
1214 }
1161 }
1215 }
1162 } else {
1216 } else {
1163 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1217 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1164 const char *n = index_node(self, rev);
1218 const char *n = index_node(self, rev);
1165 if (n == NULL) {
1219 if (n == NULL) {
1166 self->ntrev = rev + 1;
1220 self->ntrev = rev + 1;
1167 return -2;
1221 return -2;
1168 }
1222 }
1169 if (nt_insert(self, n, rev) == -1) {
1223 if (nt_insert(self, n, rev) == -1) {
1170 self->ntrev = rev + 1;
1224 self->ntrev = rev + 1;
1171 return -3;
1225 return -3;
1172 }
1226 }
1173 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1227 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1174 break;
1228 break;
1175 }
1229 }
1176 }
1230 }
1177 self->ntrev = rev;
1231 self->ntrev = rev;
1178 }
1232 }
1179
1233
1180 if (rev >= 0)
1234 if (rev >= 0)
1181 return rev;
1235 return rev;
1182 return -2;
1236 return -2;
1183 }
1237 }
1184
1238
1185 static PyObject *raise_revlog_error(void)
1239 static PyObject *raise_revlog_error(void)
1186 {
1240 {
1187 static PyObject *errclass;
1241 static PyObject *errclass;
1188 PyObject *mod = NULL, *errobj;
1242 PyObject *mod = NULL, *errobj;
1189
1243
1190 if (errclass == NULL) {
1244 if (errclass == NULL) {
1191 PyObject *dict;
1245 PyObject *dict;
1192
1246
1193 mod = PyImport_ImportModule("mercurial.error");
1247 mod = PyImport_ImportModule("mercurial.error");
1194 if (mod == NULL)
1248 if (mod == NULL)
1195 goto classfail;
1249 goto classfail;
1196
1250
1197 dict = PyModule_GetDict(mod);
1251 dict = PyModule_GetDict(mod);
1198 if (dict == NULL)
1252 if (dict == NULL)
1199 goto classfail;
1253 goto classfail;
1200
1254
1201 errclass = PyDict_GetItemString(dict, "RevlogError");
1255 errclass = PyDict_GetItemString(dict, "RevlogError");
1202 if (errclass == NULL) {
1256 if (errclass == NULL) {
1203 PyErr_SetString(PyExc_SystemError,
1257 PyErr_SetString(PyExc_SystemError,
1204 "could not find RevlogError");
1258 "could not find RevlogError");
1205 goto classfail;
1259 goto classfail;
1206 }
1260 }
1207 Py_INCREF(errclass);
1261 Py_INCREF(errclass);
1208 }
1262 }
1209
1263
1210 errobj = PyObject_CallFunction(errclass, NULL);
1264 errobj = PyObject_CallFunction(errclass, NULL);
1211 if (errobj == NULL)
1265 if (errobj == NULL)
1212 return NULL;
1266 return NULL;
1213 PyErr_SetObject(errclass, errobj);
1267 PyErr_SetObject(errclass, errobj);
1214 return errobj;
1268 return errobj;
1215
1269
1216 classfail:
1270 classfail:
1217 Py_XDECREF(mod);
1271 Py_XDECREF(mod);
1218 return NULL;
1272 return NULL;
1219 }
1273 }
1220
1274
1221 static PyObject *index_getitem(indexObject *self, PyObject *value)
1275 static PyObject *index_getitem(indexObject *self, PyObject *value)
1222 {
1276 {
1223 char *node;
1277 char *node;
1224 Py_ssize_t nodelen;
1278 Py_ssize_t nodelen;
1225 int rev;
1279 int rev;
1226
1280
1227 if (PyInt_Check(value))
1281 if (PyInt_Check(value))
1228 return index_get(self, PyInt_AS_LONG(value));
1282 return index_get(self, PyInt_AS_LONG(value));
1229
1283
1230 if (node_check(value, &node, &nodelen) == -1)
1284 if (node_check(value, &node, &nodelen) == -1)
1231 return NULL;
1285 return NULL;
1232 rev = index_find_node(self, node, nodelen);
1286 rev = index_find_node(self, node, nodelen);
1233 if (rev >= -1)
1287 if (rev >= -1)
1234 return PyInt_FromLong(rev);
1288 return PyInt_FromLong(rev);
1235 if (rev == -2)
1289 if (rev == -2)
1236 raise_revlog_error();
1290 raise_revlog_error();
1237 return NULL;
1291 return NULL;
1238 }
1292 }
1239
1293
1240 static int nt_partialmatch(indexObject *self, const char *node,
1294 static int nt_partialmatch(indexObject *self, const char *node,
1241 Py_ssize_t nodelen)
1295 Py_ssize_t nodelen)
1242 {
1296 {
1243 int rev;
1297 int rev;
1244
1298
1245 if (nt_init(self) == -1)
1299 if (nt_init(self) == -1)
1246 return -3;
1300 return -3;
1247
1301
1248 if (self->ntrev > 0) {
1302 if (self->ntrev > 0) {
1249 /* ensure that the radix tree is fully populated */
1303 /* ensure that the radix tree is fully populated */
1250 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1304 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1251 const char *n = index_node(self, rev);
1305 const char *n = index_node(self, rev);
1252 if (n == NULL)
1306 if (n == NULL)
1253 return -2;
1307 return -2;
1254 if (nt_insert(self, n, rev) == -1)
1308 if (nt_insert(self, n, rev) == -1)
1255 return -3;
1309 return -3;
1256 }
1310 }
1257 self->ntrev = rev;
1311 self->ntrev = rev;
1258 }
1312 }
1259
1313
1260 return nt_find(self, node, nodelen, 1);
1314 return nt_find(self, node, nodelen, 1);
1261 }
1315 }
1262
1316
1263 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1317 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1264 {
1318 {
1265 const char *fullnode;
1319 const char *fullnode;
1266 int nodelen;
1320 int nodelen;
1267 char *node;
1321 char *node;
1268 int rev, i;
1322 int rev, i;
1269
1323
1270 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1324 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1271 return NULL;
1325 return NULL;
1272
1326
1273 if (nodelen < 4) {
1327 if (nodelen < 4) {
1274 PyErr_SetString(PyExc_ValueError, "key too short");
1328 PyErr_SetString(PyExc_ValueError, "key too short");
1275 return NULL;
1329 return NULL;
1276 }
1330 }
1277
1331
1278 if (nodelen > 40) {
1332 if (nodelen > 40) {
1279 PyErr_SetString(PyExc_ValueError, "key too long");
1333 PyErr_SetString(PyExc_ValueError, "key too long");
1280 return NULL;
1334 return NULL;
1281 }
1335 }
1282
1336
1283 for (i = 0; i < nodelen; i++)
1337 for (i = 0; i < nodelen; i++)
1284 hexdigit(node, i);
1338 hexdigit(node, i);
1285 if (PyErr_Occurred()) {
1339 if (PyErr_Occurred()) {
1286 /* input contains non-hex characters */
1340 /* input contains non-hex characters */
1287 PyErr_Clear();
1341 PyErr_Clear();
1288 Py_RETURN_NONE;
1342 Py_RETURN_NONE;
1289 }
1343 }
1290
1344
1291 rev = nt_partialmatch(self, node, nodelen);
1345 rev = nt_partialmatch(self, node, nodelen);
1292
1346
1293 switch (rev) {
1347 switch (rev) {
1294 case -4:
1348 case -4:
1295 raise_revlog_error();
1349 raise_revlog_error();
1296 case -3:
1350 case -3:
1297 return NULL;
1351 return NULL;
1298 case -2:
1352 case -2:
1299 Py_RETURN_NONE;
1353 Py_RETURN_NONE;
1300 case -1:
1354 case -1:
1301 return PyString_FromStringAndSize(nullid, 20);
1355 return PyString_FromStringAndSize(nullid, 20);
1302 }
1356 }
1303
1357
1304 fullnode = index_node(self, rev);
1358 fullnode = index_node(self, rev);
1305 if (fullnode == NULL) {
1359 if (fullnode == NULL) {
1306 PyErr_Format(PyExc_IndexError,
1360 PyErr_Format(PyExc_IndexError,
1307 "could not access rev %d", rev);
1361 "could not access rev %d", rev);
1308 return NULL;
1362 return NULL;
1309 }
1363 }
1310 return PyString_FromStringAndSize(fullnode, 20);
1364 return PyString_FromStringAndSize(fullnode, 20);
1311 }
1365 }
1312
1366
1313 static PyObject *index_m_get(indexObject *self, PyObject *args)
1367 static PyObject *index_m_get(indexObject *self, PyObject *args)
1314 {
1368 {
1315 Py_ssize_t nodelen;
1369 Py_ssize_t nodelen;
1316 PyObject *val;
1370 PyObject *val;
1317 char *node;
1371 char *node;
1318 int rev;
1372 int rev;
1319
1373
1320 if (!PyArg_ParseTuple(args, "O", &val))
1374 if (!PyArg_ParseTuple(args, "O", &val))
1321 return NULL;
1375 return NULL;
1322 if (node_check(val, &node, &nodelen) == -1)
1376 if (node_check(val, &node, &nodelen) == -1)
1323 return NULL;
1377 return NULL;
1324 rev = index_find_node(self, node, nodelen);
1378 rev = index_find_node(self, node, nodelen);
1325 if (rev == -3)
1379 if (rev == -3)
1326 return NULL;
1380 return NULL;
1327 if (rev == -2)
1381 if (rev == -2)
1328 Py_RETURN_NONE;
1382 Py_RETURN_NONE;
1329 return PyInt_FromLong(rev);
1383 return PyInt_FromLong(rev);
1330 }
1384 }
1331
1385
1332 static int index_contains(indexObject *self, PyObject *value)
1386 static int index_contains(indexObject *self, PyObject *value)
1333 {
1387 {
1334 char *node;
1388 char *node;
1335 Py_ssize_t nodelen;
1389 Py_ssize_t nodelen;
1336
1390
1337 if (PyInt_Check(value)) {
1391 if (PyInt_Check(value)) {
1338 long rev = PyInt_AS_LONG(value);
1392 long rev = PyInt_AS_LONG(value);
1339 return rev >= -1 && rev < index_length(self);
1393 return rev >= -1 && rev < index_length(self);
1340 }
1394 }
1341
1395
1342 if (node_check(value, &node, &nodelen) == -1)
1396 if (node_check(value, &node, &nodelen) == -1)
1343 return -1;
1397 return -1;
1344
1398
1345 switch (index_find_node(self, node, nodelen)) {
1399 switch (index_find_node(self, node, nodelen)) {
1346 case -3:
1400 case -3:
1347 return -1;
1401 return -1;
1348 case -2:
1402 case -2:
1349 return 0;
1403 return 0;
1350 default:
1404 default:
1351 return 1;
1405 return 1;
1352 }
1406 }
1353 }
1407 }
1354
1408
1355 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1409 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1356 {
1410 {
1357 if (rev >= self->length - 1) {
1411 if (rev >= self->length - 1) {
1358 PyObject *tuple = PyList_GET_ITEM(self->added,
1412 PyObject *tuple = PyList_GET_ITEM(self->added,
1359 rev - self->length + 1);
1413 rev - self->length + 1);
1360 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1414 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1361 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1415 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1362 } else {
1416 } else {
1363 const char *data = index_deref(self, rev);
1417 const char *data = index_deref(self, rev);
1364 ps[0] = getbe32(data + 24);
1418 ps[0] = getbe32(data + 24);
1365 ps[1] = getbe32(data + 28);
1419 ps[1] = getbe32(data + 28);
1366 }
1420 }
1367 }
1421 }
1368
1422
1369 typedef uint64_t bitmask;
1423 typedef uint64_t bitmask;
1370
1424
1371 /*
1425 /*
1372 * Given a disjoint set of revs, return all candidates for the
1426 * Given a disjoint set of revs, return all candidates for the
1373 * greatest common ancestor. In revset notation, this is the set
1427 * greatest common ancestor. In revset notation, this is the set
1374 * "heads(::a and ::b and ...)"
1428 * "heads(::a and ::b and ...)"
1375 */
1429 */
1376 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1430 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1377 int revcount)
1431 int revcount)
1378 {
1432 {
1379 const bitmask allseen = (1ull << revcount) - 1;
1433 const bitmask allseen = (1ull << revcount) - 1;
1380 const bitmask poison = 1ull << revcount;
1434 const bitmask poison = 1ull << revcount;
1381 PyObject *gca = PyList_New(0);
1435 PyObject *gca = PyList_New(0);
1382 int i, v, interesting;
1436 int i, v, interesting;
1383 int maxrev = -1;
1437 int maxrev = -1;
1384 bitmask sp;
1438 bitmask sp;
1385 bitmask *seen;
1439 bitmask *seen;
1386
1440
1387 if (gca == NULL)
1441 if (gca == NULL)
1388 return PyErr_NoMemory();
1442 return PyErr_NoMemory();
1389
1443
1390 for (i = 0; i < revcount; i++) {
1444 for (i = 0; i < revcount; i++) {
1391 if (revs[i] > maxrev)
1445 if (revs[i] > maxrev)
1392 maxrev = revs[i];
1446 maxrev = revs[i];
1393 }
1447 }
1394
1448
1395 seen = calloc(sizeof(*seen), maxrev + 1);
1449 seen = calloc(sizeof(*seen), maxrev + 1);
1396 if (seen == NULL) {
1450 if (seen == NULL) {
1397 Py_DECREF(gca);
1451 Py_DECREF(gca);
1398 return PyErr_NoMemory();
1452 return PyErr_NoMemory();
1399 }
1453 }
1400
1454
1401 for (i = 0; i < revcount; i++)
1455 for (i = 0; i < revcount; i++)
1402 seen[revs[i]] = 1ull << i;
1456 seen[revs[i]] = 1ull << i;
1403
1457
1404 interesting = revcount;
1458 interesting = revcount;
1405
1459
1406 for (v = maxrev; v >= 0 && interesting; v--) {
1460 for (v = maxrev; v >= 0 && interesting; v--) {
1407 bitmask sv = seen[v];
1461 bitmask sv = seen[v];
1408 int parents[2];
1462 int parents[2];
1409
1463
1410 if (!sv)
1464 if (!sv)
1411 continue;
1465 continue;
1412
1466
1413 if (sv < poison) {
1467 if (sv < poison) {
1414 interesting -= 1;
1468 interesting -= 1;
1415 if (sv == allseen) {
1469 if (sv == allseen) {
1416 PyObject *obj = PyInt_FromLong(v);
1470 PyObject *obj = PyInt_FromLong(v);
1417 if (obj == NULL)
1471 if (obj == NULL)
1418 goto bail;
1472 goto bail;
1419 if (PyList_Append(gca, obj) == -1) {
1473 if (PyList_Append(gca, obj) == -1) {
1420 Py_DECREF(obj);
1474 Py_DECREF(obj);
1421 goto bail;
1475 goto bail;
1422 }
1476 }
1423 sv |= poison;
1477 sv |= poison;
1424 for (i = 0; i < revcount; i++) {
1478 for (i = 0; i < revcount; i++) {
1425 if (revs[i] == v)
1479 if (revs[i] == v)
1426 goto done;
1480 goto done;
1427 }
1481 }
1428 }
1482 }
1429 }
1483 }
1430 index_get_parents(self, v, parents);
1484 index_get_parents(self, v, parents);
1431
1485
1432 for (i = 0; i < 2; i++) {
1486 for (i = 0; i < 2; i++) {
1433 int p = parents[i];
1487 int p = parents[i];
1434 if (p == -1)
1488 if (p == -1)
1435 continue;
1489 continue;
1436 sp = seen[p];
1490 sp = seen[p];
1437 if (sv < poison) {
1491 if (sv < poison) {
1438 if (sp == 0) {
1492 if (sp == 0) {
1439 seen[p] = sv;
1493 seen[p] = sv;
1440 interesting++;
1494 interesting++;
1441 }
1495 }
1442 else if (sp != sv)
1496 else if (sp != sv)
1443 seen[p] |= sv;
1497 seen[p] |= sv;
1444 } else {
1498 } else {
1445 if (sp && sp < poison)
1499 if (sp && sp < poison)
1446 interesting--;
1500 interesting--;
1447 seen[p] = sv;
1501 seen[p] = sv;
1448 }
1502 }
1449 }
1503 }
1450 }
1504 }
1451
1505
1452 done:
1506 done:
1453 free(seen);
1507 free(seen);
1454 return gca;
1508 return gca;
1455 bail:
1509 bail:
1456 free(seen);
1510 free(seen);
1457 Py_XDECREF(gca);
1511 Py_XDECREF(gca);
1458 return NULL;
1512 return NULL;
1459 }
1513 }
1460
1514
1461 /*
1515 /*
1462 * Given a disjoint set of revs, return the subset with the longest
1516 * Given a disjoint set of revs, return the subset with the longest
1463 * path to the root.
1517 * path to the root.
1464 */
1518 */
1465 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1519 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1466 {
1520 {
1467 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1521 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1468 static const Py_ssize_t capacity = 24;
1522 static const Py_ssize_t capacity = 24;
1469 int *depth, *interesting = NULL;
1523 int *depth, *interesting = NULL;
1470 int i, j, v, ninteresting;
1524 int i, j, v, ninteresting;
1471 PyObject *dict = NULL, *keys = NULL;
1525 PyObject *dict = NULL, *keys = NULL;
1472 long *seen = NULL;
1526 long *seen = NULL;
1473 int maxrev = -1;
1527 int maxrev = -1;
1474 long final;
1528 long final;
1475
1529
1476 if (revcount > capacity) {
1530 if (revcount > capacity) {
1477 PyErr_Format(PyExc_OverflowError,
1531 PyErr_Format(PyExc_OverflowError,
1478 "bitset size (%ld) > capacity (%ld)",
1532 "bitset size (%ld) > capacity (%ld)",
1479 (long)revcount, (long)capacity);
1533 (long)revcount, (long)capacity);
1480 return NULL;
1534 return NULL;
1481 }
1535 }
1482
1536
1483 for (i = 0; i < revcount; i++) {
1537 for (i = 0; i < revcount; i++) {
1484 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1538 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1485 if (n > maxrev)
1539 if (n > maxrev)
1486 maxrev = n;
1540 maxrev = n;
1487 }
1541 }
1488
1542
1489 depth = calloc(sizeof(*depth), maxrev + 1);
1543 depth = calloc(sizeof(*depth), maxrev + 1);
1490 if (depth == NULL)
1544 if (depth == NULL)
1491 return PyErr_NoMemory();
1545 return PyErr_NoMemory();
1492
1546
1493 seen = calloc(sizeof(*seen), maxrev + 1);
1547 seen = calloc(sizeof(*seen), maxrev + 1);
1494 if (seen == NULL) {
1548 if (seen == NULL) {
1495 PyErr_NoMemory();
1549 PyErr_NoMemory();
1496 goto bail;
1550 goto bail;
1497 }
1551 }
1498
1552
1499 interesting = calloc(sizeof(*interesting), 2 << revcount);
1553 interesting = calloc(sizeof(*interesting), 2 << revcount);
1500 if (interesting == NULL) {
1554 if (interesting == NULL) {
1501 PyErr_NoMemory();
1555 PyErr_NoMemory();
1502 goto bail;
1556 goto bail;
1503 }
1557 }
1504
1558
1505 if (PyList_Sort(revs) == -1)
1559 if (PyList_Sort(revs) == -1)
1506 goto bail;
1560 goto bail;
1507
1561
1508 for (i = 0; i < revcount; i++) {
1562 for (i = 0; i < revcount; i++) {
1509 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1563 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1510 long b = 1l << i;
1564 long b = 1l << i;
1511 depth[n] = 1;
1565 depth[n] = 1;
1512 seen[n] = b;
1566 seen[n] = b;
1513 interesting[b] = 1;
1567 interesting[b] = 1;
1514 }
1568 }
1515
1569
1516 ninteresting = (int)revcount;
1570 ninteresting = (int)revcount;
1517
1571
1518 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1572 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1519 int dv = depth[v];
1573 int dv = depth[v];
1520 int parents[2];
1574 int parents[2];
1521 long sv;
1575 long sv;
1522
1576
1523 if (dv == 0)
1577 if (dv == 0)
1524 continue;
1578 continue;
1525
1579
1526 sv = seen[v];
1580 sv = seen[v];
1527 index_get_parents(self, v, parents);
1581 index_get_parents(self, v, parents);
1528
1582
1529 for (i = 0; i < 2; i++) {
1583 for (i = 0; i < 2; i++) {
1530 int p = parents[i];
1584 int p = parents[i];
1531 long nsp, sp;
1585 long nsp, sp;
1532 int dp;
1586 int dp;
1533
1587
1534 if (p == -1)
1588 if (p == -1)
1535 continue;
1589 continue;
1536
1590
1537 dp = depth[p];
1591 dp = depth[p];
1538 nsp = sp = seen[p];
1592 nsp = sp = seen[p];
1539 if (dp <= dv) {
1593 if (dp <= dv) {
1540 depth[p] = dv + 1;
1594 depth[p] = dv + 1;
1541 if (sp != sv) {
1595 if (sp != sv) {
1542 interesting[sv] += 1;
1596 interesting[sv] += 1;
1543 nsp = seen[p] = sv;
1597 nsp = seen[p] = sv;
1544 if (sp) {
1598 if (sp) {
1545 interesting[sp] -= 1;
1599 interesting[sp] -= 1;
1546 if (interesting[sp] == 0)
1600 if (interesting[sp] == 0)
1547 ninteresting -= 1;
1601 ninteresting -= 1;
1548 }
1602 }
1549 }
1603 }
1550 }
1604 }
1551 else if (dv == dp - 1) {
1605 else if (dv == dp - 1) {
1552 nsp = sp | sv;
1606 nsp = sp | sv;
1553 if (nsp == sp)
1607 if (nsp == sp)
1554 continue;
1608 continue;
1555 seen[p] = nsp;
1609 seen[p] = nsp;
1556 interesting[sp] -= 1;
1610 interesting[sp] -= 1;
1557 if (interesting[sp] == 0 && interesting[nsp] > 0)
1611 if (interesting[sp] == 0 && interesting[nsp] > 0)
1558 ninteresting -= 1;
1612 ninteresting -= 1;
1559 interesting[nsp] += 1;
1613 interesting[nsp] += 1;
1560 }
1614 }
1561 }
1615 }
1562 interesting[sv] -= 1;
1616 interesting[sv] -= 1;
1563 if (interesting[sv] == 0)
1617 if (interesting[sv] == 0)
1564 ninteresting -= 1;
1618 ninteresting -= 1;
1565 }
1619 }
1566
1620
1567 final = 0;
1621 final = 0;
1568 j = ninteresting;
1622 j = ninteresting;
1569 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1623 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1570 if (interesting[i] == 0)
1624 if (interesting[i] == 0)
1571 continue;
1625 continue;
1572 final |= i;
1626 final |= i;
1573 j -= 1;
1627 j -= 1;
1574 }
1628 }
1575 if (final == 0) {
1629 if (final == 0) {
1576 keys = PyList_New(0);
1630 keys = PyList_New(0);
1577 goto bail;
1631 goto bail;
1578 }
1632 }
1579
1633
1580 dict = PyDict_New();
1634 dict = PyDict_New();
1581 if (dict == NULL)
1635 if (dict == NULL)
1582 goto bail;
1636 goto bail;
1583
1637
1584 for (i = 0; i < revcount; i++) {
1638 for (i = 0; i < revcount; i++) {
1585 PyObject *key;
1639 PyObject *key;
1586
1640
1587 if ((final & (1 << i)) == 0)
1641 if ((final & (1 << i)) == 0)
1588 continue;
1642 continue;
1589
1643
1590 key = PyList_GET_ITEM(revs, i);
1644 key = PyList_GET_ITEM(revs, i);
1591 Py_INCREF(key);
1645 Py_INCREF(key);
1592 Py_INCREF(Py_None);
1646 Py_INCREF(Py_None);
1593 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1647 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1594 Py_DECREF(key);
1648 Py_DECREF(key);
1595 Py_DECREF(Py_None);
1649 Py_DECREF(Py_None);
1596 goto bail;
1650 goto bail;
1597 }
1651 }
1598 }
1652 }
1599
1653
1600 keys = PyDict_Keys(dict);
1654 keys = PyDict_Keys(dict);
1601
1655
1602 bail:
1656 bail:
1603 free(depth);
1657 free(depth);
1604 free(seen);
1658 free(seen);
1605 free(interesting);
1659 free(interesting);
1606 Py_XDECREF(dict);
1660 Py_XDECREF(dict);
1607
1661
1608 return keys;
1662 return keys;
1609 }
1663 }
1610
1664
1611 /*
1665 /*
1612 * Given a (possibly overlapping) set of revs, return the greatest
1666 * Given a (possibly overlapping) set of revs, return the greatest
1613 * common ancestors: those with the longest path to the root.
1667 * common ancestors: those with the longest path to the root.
1614 */
1668 */
1615 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1669 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1616 {
1670 {
1617 PyObject *ret = NULL, *gca = NULL;
1671 PyObject *ret = NULL, *gca = NULL;
1618 Py_ssize_t argcount, i, len;
1672 Py_ssize_t argcount, i, len;
1619 bitmask repeat = 0;
1673 bitmask repeat = 0;
1620 int revcount = 0;
1674 int revcount = 0;
1621 int *revs;
1675 int *revs;
1622
1676
1623 argcount = PySequence_Length(args);
1677 argcount = PySequence_Length(args);
1624 revs = malloc(argcount * sizeof(*revs));
1678 revs = malloc(argcount * sizeof(*revs));
1625 if (argcount > 0 && revs == NULL)
1679 if (argcount > 0 && revs == NULL)
1626 return PyErr_NoMemory();
1680 return PyErr_NoMemory();
1627 len = index_length(self) - 1;
1681 len = index_length(self) - 1;
1628
1682
1629 for (i = 0; i < argcount; i++) {
1683 for (i = 0; i < argcount; i++) {
1630 static const int capacity = 24;
1684 static const int capacity = 24;
1631 PyObject *obj = PySequence_GetItem(args, i);
1685 PyObject *obj = PySequence_GetItem(args, i);
1632 bitmask x;
1686 bitmask x;
1633 long val;
1687 long val;
1634
1688
1635 if (!PyInt_Check(obj)) {
1689 if (!PyInt_Check(obj)) {
1636 PyErr_SetString(PyExc_TypeError,
1690 PyErr_SetString(PyExc_TypeError,
1637 "arguments must all be ints");
1691 "arguments must all be ints");
1638 goto bail;
1692 goto bail;
1639 }
1693 }
1640 val = PyInt_AsLong(obj);
1694 val = PyInt_AsLong(obj);
1641 if (val == -1) {
1695 if (val == -1) {
1642 ret = PyList_New(0);
1696 ret = PyList_New(0);
1643 goto done;
1697 goto done;
1644 }
1698 }
1645 if (val < 0 || val >= len) {
1699 if (val < 0 || val >= len) {
1646 PyErr_SetString(PyExc_IndexError,
1700 PyErr_SetString(PyExc_IndexError,
1647 "index out of range");
1701 "index out of range");
1648 goto bail;
1702 goto bail;
1649 }
1703 }
1650 /* this cheesy bloom filter lets us avoid some more
1704 /* this cheesy bloom filter lets us avoid some more
1651 * expensive duplicate checks in the common set-is-disjoint
1705 * expensive duplicate checks in the common set-is-disjoint
1652 * case */
1706 * case */
1653 x = 1ull << (val & 0x3f);
1707 x = 1ull << (val & 0x3f);
1654 if (repeat & x) {
1708 if (repeat & x) {
1655 int k;
1709 int k;
1656 for (k = 0; k < revcount; k++) {
1710 for (k = 0; k < revcount; k++) {
1657 if (val == revs[k])
1711 if (val == revs[k])
1658 goto duplicate;
1712 goto duplicate;
1659 }
1713 }
1660 }
1714 }
1661 else repeat |= x;
1715 else repeat |= x;
1662 if (revcount >= capacity) {
1716 if (revcount >= capacity) {
1663 PyErr_Format(PyExc_OverflowError,
1717 PyErr_Format(PyExc_OverflowError,
1664 "bitset size (%d) > capacity (%d)",
1718 "bitset size (%d) > capacity (%d)",
1665 revcount, capacity);
1719 revcount, capacity);
1666 goto bail;
1720 goto bail;
1667 }
1721 }
1668 revs[revcount++] = (int)val;
1722 revs[revcount++] = (int)val;
1669 duplicate:;
1723 duplicate:;
1670 }
1724 }
1671
1725
1672 if (revcount == 0) {
1726 if (revcount == 0) {
1673 ret = PyList_New(0);
1727 ret = PyList_New(0);
1674 goto done;
1728 goto done;
1675 }
1729 }
1676 if (revcount == 1) {
1730 if (revcount == 1) {
1677 PyObject *obj;
1731 PyObject *obj;
1678 ret = PyList_New(1);
1732 ret = PyList_New(1);
1679 if (ret == NULL)
1733 if (ret == NULL)
1680 goto bail;
1734 goto bail;
1681 obj = PyInt_FromLong(revs[0]);
1735 obj = PyInt_FromLong(revs[0]);
1682 if (obj == NULL)
1736 if (obj == NULL)
1683 goto bail;
1737 goto bail;
1684 PyList_SET_ITEM(ret, 0, obj);
1738 PyList_SET_ITEM(ret, 0, obj);
1685 goto done;
1739 goto done;
1686 }
1740 }
1687
1741
1688 gca = find_gca_candidates(self, revs, revcount);
1742 gca = find_gca_candidates(self, revs, revcount);
1689 if (gca == NULL)
1743 if (gca == NULL)
1690 goto bail;
1744 goto bail;
1691
1745
1692 if (PyList_GET_SIZE(gca) <= 1) {
1746 if (PyList_GET_SIZE(gca) <= 1) {
1693 ret = gca;
1747 ret = gca;
1694 Py_INCREF(gca);
1748 Py_INCREF(gca);
1695 }
1749 }
1696 else ret = find_deepest(self, gca);
1750 else ret = find_deepest(self, gca);
1697
1751
1698 done:
1752 done:
1699 free(revs);
1753 free(revs);
1700 Py_XDECREF(gca);
1754 Py_XDECREF(gca);
1701
1755
1702 return ret;
1756 return ret;
1703
1757
1704 bail:
1758 bail:
1705 free(revs);
1759 free(revs);
1706 Py_XDECREF(gca);
1760 Py_XDECREF(gca);
1707 Py_XDECREF(ret);
1761 Py_XDECREF(ret);
1708 return NULL;
1762 return NULL;
1709 }
1763 }
1710
1764
1711 /*
1765 /*
1712 * Given a (possibly overlapping) set of revs, return all the
1766 * Given a (possibly overlapping) set of revs, return all the
1713 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1767 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1714 */
1768 */
1715 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1769 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1716 {
1770 {
1717 PyObject *ret = NULL;
1771 PyObject *ret = NULL;
1718 Py_ssize_t argcount, i, len;
1772 Py_ssize_t argcount, i, len;
1719 bitmask repeat = 0;
1773 bitmask repeat = 0;
1720 int revcount = 0;
1774 int revcount = 0;
1721 int *revs;
1775 int *revs;
1722
1776
1723 argcount = PySequence_Length(args);
1777 argcount = PySequence_Length(args);
1724 revs = malloc(argcount * sizeof(*revs));
1778 revs = malloc(argcount * sizeof(*revs));
1725 if (argcount > 0 && revs == NULL)
1779 if (argcount > 0 && revs == NULL)
1726 return PyErr_NoMemory();
1780 return PyErr_NoMemory();
1727 len = index_length(self) - 1;
1781 len = index_length(self) - 1;
1728
1782
1729 for (i = 0; i < argcount; i++) {
1783 for (i = 0; i < argcount; i++) {
1730 static const int capacity = 24;
1784 static const int capacity = 24;
1731 PyObject *obj = PySequence_GetItem(args, i);
1785 PyObject *obj = PySequence_GetItem(args, i);
1732 bitmask x;
1786 bitmask x;
1733 long val;
1787 long val;
1734
1788
1735 if (!PyInt_Check(obj)) {
1789 if (!PyInt_Check(obj)) {
1736 PyErr_SetString(PyExc_TypeError,
1790 PyErr_SetString(PyExc_TypeError,
1737 "arguments must all be ints");
1791 "arguments must all be ints");
1738 goto bail;
1792 goto bail;
1739 }
1793 }
1740 val = PyInt_AsLong(obj);
1794 val = PyInt_AsLong(obj);
1741 if (val == -1) {
1795 if (val == -1) {
1742 ret = PyList_New(0);
1796 ret = PyList_New(0);
1743 goto done;
1797 goto done;
1744 }
1798 }
1745 if (val < 0 || val >= len) {
1799 if (val < 0 || val >= len) {
1746 PyErr_SetString(PyExc_IndexError,
1800 PyErr_SetString(PyExc_IndexError,
1747 "index out of range");
1801 "index out of range");
1748 goto bail;
1802 goto bail;
1749 }
1803 }
1750 /* this cheesy bloom filter lets us avoid some more
1804 /* this cheesy bloom filter lets us avoid some more
1751 * expensive duplicate checks in the common set-is-disjoint
1805 * expensive duplicate checks in the common set-is-disjoint
1752 * case */
1806 * case */
1753 x = 1ull << (val & 0x3f);
1807 x = 1ull << (val & 0x3f);
1754 if (repeat & x) {
1808 if (repeat & x) {
1755 int k;
1809 int k;
1756 for (k = 0; k < revcount; k++) {
1810 for (k = 0; k < revcount; k++) {
1757 if (val == revs[k])
1811 if (val == revs[k])
1758 goto duplicate;
1812 goto duplicate;
1759 }
1813 }
1760 }
1814 }
1761 else repeat |= x;
1815 else repeat |= x;
1762 if (revcount >= capacity) {
1816 if (revcount >= capacity) {
1763 PyErr_Format(PyExc_OverflowError,
1817 PyErr_Format(PyExc_OverflowError,
1764 "bitset size (%d) > capacity (%d)",
1818 "bitset size (%d) > capacity (%d)",
1765 revcount, capacity);
1819 revcount, capacity);
1766 goto bail;
1820 goto bail;
1767 }
1821 }
1768 revs[revcount++] = (int)val;
1822 revs[revcount++] = (int)val;
1769 duplicate:;
1823 duplicate:;
1770 }
1824 }
1771
1825
1772 if (revcount == 0) {
1826 if (revcount == 0) {
1773 ret = PyList_New(0);
1827 ret = PyList_New(0);
1774 goto done;
1828 goto done;
1775 }
1829 }
1776 if (revcount == 1) {
1830 if (revcount == 1) {
1777 PyObject *obj;
1831 PyObject *obj;
1778 ret = PyList_New(1);
1832 ret = PyList_New(1);
1779 if (ret == NULL)
1833 if (ret == NULL)
1780 goto bail;
1834 goto bail;
1781 obj = PyInt_FromLong(revs[0]);
1835 obj = PyInt_FromLong(revs[0]);
1782 if (obj == NULL)
1836 if (obj == NULL)
1783 goto bail;
1837 goto bail;
1784 PyList_SET_ITEM(ret, 0, obj);
1838 PyList_SET_ITEM(ret, 0, obj);
1785 goto done;
1839 goto done;
1786 }
1840 }
1787
1841
1788 ret = find_gca_candidates(self, revs, revcount);
1842 ret = find_gca_candidates(self, revs, revcount);
1789 if (ret == NULL)
1843 if (ret == NULL)
1790 goto bail;
1844 goto bail;
1791
1845
1792 done:
1846 done:
1793 free(revs);
1847 free(revs);
1794 return ret;
1848 return ret;
1795
1849
1796 bail:
1850 bail:
1797 free(revs);
1851 free(revs);
1798 Py_XDECREF(ret);
1852 Py_XDECREF(ret);
1799 return NULL;
1853 return NULL;
1800 }
1854 }
1801
1855
1802 /*
1856 /*
1803 * Invalidate any trie entries introduced by added revs.
1857 * Invalidate any trie entries introduced by added revs.
1804 */
1858 */
1805 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1859 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1806 {
1860 {
1807 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1861 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1808
1862
1809 for (i = start; i < len; i++) {
1863 for (i = start; i < len; i++) {
1810 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1864 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1811 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1865 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1812
1866
1813 nt_insert(self, PyString_AS_STRING(node), -1);
1867 nt_insert(self, PyString_AS_STRING(node), -1);
1814 }
1868 }
1815
1869
1816 if (start == 0)
1870 if (start == 0)
1817 Py_CLEAR(self->added);
1871 Py_CLEAR(self->added);
1818 }
1872 }
1819
1873
1820 /*
1874 /*
1821 * Delete a numeric range of revs, which must be at the end of the
1875 * Delete a numeric range of revs, which must be at the end of the
1822 * range, but exclude the sentinel nullid entry.
1876 * range, but exclude the sentinel nullid entry.
1823 */
1877 */
1824 static int index_slice_del(indexObject *self, PyObject *item)
1878 static int index_slice_del(indexObject *self, PyObject *item)
1825 {
1879 {
1826 Py_ssize_t start, stop, step, slicelength;
1880 Py_ssize_t start, stop, step, slicelength;
1827 Py_ssize_t length = index_length(self);
1881 Py_ssize_t length = index_length(self);
1828 int ret = 0;
1882 int ret = 0;
1829
1883
1830 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1884 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1831 &start, &stop, &step, &slicelength) < 0)
1885 &start, &stop, &step, &slicelength) < 0)
1832 return -1;
1886 return -1;
1833
1887
1834 if (slicelength <= 0)
1888 if (slicelength <= 0)
1835 return 0;
1889 return 0;
1836
1890
1837 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1891 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1838 stop = start;
1892 stop = start;
1839
1893
1840 if (step < 0) {
1894 if (step < 0) {
1841 stop = start + 1;
1895 stop = start + 1;
1842 start = stop + step*(slicelength - 1) - 1;
1896 start = stop + step*(slicelength - 1) - 1;
1843 step = -step;
1897 step = -step;
1844 }
1898 }
1845
1899
1846 if (step != 1) {
1900 if (step != 1) {
1847 PyErr_SetString(PyExc_ValueError,
1901 PyErr_SetString(PyExc_ValueError,
1848 "revlog index delete requires step size of 1");
1902 "revlog index delete requires step size of 1");
1849 return -1;
1903 return -1;
1850 }
1904 }
1851
1905
1852 if (stop != length - 1) {
1906 if (stop != length - 1) {
1853 PyErr_SetString(PyExc_IndexError,
1907 PyErr_SetString(PyExc_IndexError,
1854 "revlog index deletion indices are invalid");
1908 "revlog index deletion indices are invalid");
1855 return -1;
1909 return -1;
1856 }
1910 }
1857
1911
1858 if (start < self->length - 1) {
1912 if (start < self->length - 1) {
1859 if (self->nt) {
1913 if (self->nt) {
1860 Py_ssize_t i;
1914 Py_ssize_t i;
1861
1915
1862 for (i = start + 1; i < self->length - 1; i++) {
1916 for (i = start + 1; i < self->length - 1; i++) {
1863 const char *node = index_node(self, i);
1917 const char *node = index_node(self, i);
1864
1918
1865 if (node)
1919 if (node)
1866 nt_insert(self, node, -1);
1920 nt_insert(self, node, -1);
1867 }
1921 }
1868 if (self->added)
1922 if (self->added)
1869 nt_invalidate_added(self, 0);
1923 nt_invalidate_added(self, 0);
1870 if (self->ntrev > start)
1924 if (self->ntrev > start)
1871 self->ntrev = (int)start;
1925 self->ntrev = (int)start;
1872 }
1926 }
1873 self->length = start + 1;
1927 self->length = start + 1;
1874 if (start < self->raw_length) {
1928 if (start < self->raw_length) {
1875 if (self->cache) {
1929 if (self->cache) {
1876 Py_ssize_t i;
1930 Py_ssize_t i;
1877 for (i = start; i < self->raw_length; i++)
1931 for (i = start; i < self->raw_length; i++)
1878 Py_CLEAR(self->cache[i]);
1932 Py_CLEAR(self->cache[i]);
1879 }
1933 }
1880 self->raw_length = start;
1934 self->raw_length = start;
1881 }
1935 }
1882 goto done;
1936 goto done;
1883 }
1937 }
1884
1938
1885 if (self->nt) {
1939 if (self->nt) {
1886 nt_invalidate_added(self, start - self->length + 1);
1940 nt_invalidate_added(self, start - self->length + 1);
1887 if (self->ntrev > start)
1941 if (self->ntrev > start)
1888 self->ntrev = (int)start;
1942 self->ntrev = (int)start;
1889 }
1943 }
1890 if (self->added)
1944 if (self->added)
1891 ret = PyList_SetSlice(self->added, start - self->length + 1,
1945 ret = PyList_SetSlice(self->added, start - self->length + 1,
1892 PyList_GET_SIZE(self->added), NULL);
1946 PyList_GET_SIZE(self->added), NULL);
1893 done:
1947 done:
1894 Py_CLEAR(self->headrevs);
1948 Py_CLEAR(self->headrevs);
1895 return ret;
1949 return ret;
1896 }
1950 }
1897
1951
1898 /*
1952 /*
1899 * Supported ops:
1953 * Supported ops:
1900 *
1954 *
1901 * slice deletion
1955 * slice deletion
1902 * string assignment (extend node->rev mapping)
1956 * string assignment (extend node->rev mapping)
1903 * string deletion (shrink node->rev mapping)
1957 * string deletion (shrink node->rev mapping)
1904 */
1958 */
1905 static int index_assign_subscript(indexObject *self, PyObject *item,
1959 static int index_assign_subscript(indexObject *self, PyObject *item,
1906 PyObject *value)
1960 PyObject *value)
1907 {
1961 {
1908 char *node;
1962 char *node;
1909 Py_ssize_t nodelen;
1963 Py_ssize_t nodelen;
1910 long rev;
1964 long rev;
1911
1965
1912 if (PySlice_Check(item) && value == NULL)
1966 if (PySlice_Check(item) && value == NULL)
1913 return index_slice_del(self, item);
1967 return index_slice_del(self, item);
1914
1968
1915 if (node_check(item, &node, &nodelen) == -1)
1969 if (node_check(item, &node, &nodelen) == -1)
1916 return -1;
1970 return -1;
1917
1971
1918 if (value == NULL)
1972 if (value == NULL)
1919 return self->nt ? nt_insert(self, node, -1) : 0;
1973 return self->nt ? nt_insert(self, node, -1) : 0;
1920 rev = PyInt_AsLong(value);
1974 rev = PyInt_AsLong(value);
1921 if (rev > INT_MAX || rev < 0) {
1975 if (rev > INT_MAX || rev < 0) {
1922 if (!PyErr_Occurred())
1976 if (!PyErr_Occurred())
1923 PyErr_SetString(PyExc_ValueError, "rev out of range");
1977 PyErr_SetString(PyExc_ValueError, "rev out of range");
1924 return -1;
1978 return -1;
1925 }
1979 }
1926 return nt_insert(self, node, (int)rev);
1980 return nt_insert(self, node, (int)rev);
1927 }
1981 }
1928
1982
1929 /*
1983 /*
1930 * Find all RevlogNG entries in an index that has inline data. Update
1984 * Find all RevlogNG entries in an index that has inline data. Update
1931 * the optional "offsets" table with those entries.
1985 * the optional "offsets" table with those entries.
1932 */
1986 */
1933 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1987 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1934 {
1988 {
1935 const char *data = PyString_AS_STRING(self->data);
1989 const char *data = PyString_AS_STRING(self->data);
1936 Py_ssize_t pos = 0;
1990 Py_ssize_t pos = 0;
1937 Py_ssize_t end = PyString_GET_SIZE(self->data);
1991 Py_ssize_t end = PyString_GET_SIZE(self->data);
1938 long incr = v1_hdrsize;
1992 long incr = v1_hdrsize;
1939 Py_ssize_t len = 0;
1993 Py_ssize_t len = 0;
1940
1994
1941 while (pos + v1_hdrsize <= end && pos >= 0) {
1995 while (pos + v1_hdrsize <= end && pos >= 0) {
1942 uint32_t comp_len;
1996 uint32_t comp_len;
1943 /* 3rd element of header is length of compressed inline data */
1997 /* 3rd element of header is length of compressed inline data */
1944 comp_len = getbe32(data + pos + 8);
1998 comp_len = getbe32(data + pos + 8);
1945 incr = v1_hdrsize + comp_len;
1999 incr = v1_hdrsize + comp_len;
1946 if (offsets)
2000 if (offsets)
1947 offsets[len] = data + pos;
2001 offsets[len] = data + pos;
1948 len++;
2002 len++;
1949 pos += incr;
2003 pos += incr;
1950 }
2004 }
1951
2005
1952 if (pos != end) {
2006 if (pos != end) {
1953 if (!PyErr_Occurred())
2007 if (!PyErr_Occurred())
1954 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2008 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1955 return -1;
2009 return -1;
1956 }
2010 }
1957
2011
1958 return len;
2012 return len;
1959 }
2013 }
1960
2014
1961 static int index_init(indexObject *self, PyObject *args)
2015 static int index_init(indexObject *self, PyObject *args)
1962 {
2016 {
1963 PyObject *data_obj, *inlined_obj;
2017 PyObject *data_obj, *inlined_obj;
1964 Py_ssize_t size;
2018 Py_ssize_t size;
1965
2019
1966 /* Initialize before argument-checking to avoid index_dealloc() crash. */
2020 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1967 self->raw_length = 0;
2021 self->raw_length = 0;
1968 self->added = NULL;
2022 self->added = NULL;
1969 self->cache = NULL;
2023 self->cache = NULL;
1970 self->data = NULL;
2024 self->data = NULL;
1971 self->headrevs = NULL;
2025 self->headrevs = NULL;
1972 self->filteredrevs = Py_None;
2026 self->filteredrevs = Py_None;
1973 Py_INCREF(Py_None);
2027 Py_INCREF(Py_None);
1974 self->nt = NULL;
2028 self->nt = NULL;
1975 self->offsets = NULL;
2029 self->offsets = NULL;
1976
2030
1977 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2031 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1978 return -1;
2032 return -1;
1979 if (!PyString_Check(data_obj)) {
2033 if (!PyString_Check(data_obj)) {
1980 PyErr_SetString(PyExc_TypeError, "data is not a string");
2034 PyErr_SetString(PyExc_TypeError, "data is not a string");
1981 return -1;
2035 return -1;
1982 }
2036 }
1983 size = PyString_GET_SIZE(data_obj);
2037 size = PyString_GET_SIZE(data_obj);
1984
2038
1985 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2039 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1986 self->data = data_obj;
2040 self->data = data_obj;
1987
2041
1988 self->ntlength = self->ntcapacity = 0;
2042 self->ntlength = self->ntcapacity = 0;
1989 self->ntdepth = self->ntsplits = 0;
2043 self->ntdepth = self->ntsplits = 0;
1990 self->ntlookups = self->ntmisses = 0;
2044 self->ntlookups = self->ntmisses = 0;
1991 self->ntrev = -1;
2045 self->ntrev = -1;
1992 Py_INCREF(self->data);
2046 Py_INCREF(self->data);
1993
2047
1994 if (self->inlined) {
2048 if (self->inlined) {
1995 Py_ssize_t len = inline_scan(self, NULL);
2049 Py_ssize_t len = inline_scan(self, NULL);
1996 if (len == -1)
2050 if (len == -1)
1997 goto bail;
2051 goto bail;
1998 self->raw_length = len;
2052 self->raw_length = len;
1999 self->length = len + 1;
2053 self->length = len + 1;
2000 } else {
2054 } else {
2001 if (size % v1_hdrsize) {
2055 if (size % v1_hdrsize) {
2002 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2056 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2003 goto bail;
2057 goto bail;
2004 }
2058 }
2005 self->raw_length = size / v1_hdrsize;
2059 self->raw_length = size / v1_hdrsize;
2006 self->length = self->raw_length + 1;
2060 self->length = self->raw_length + 1;
2007 }
2061 }
2008
2062
2009 return 0;
2063 return 0;
2010 bail:
2064 bail:
2011 return -1;
2065 return -1;
2012 }
2066 }
2013
2067
2014 static PyObject *index_nodemap(indexObject *self)
2068 static PyObject *index_nodemap(indexObject *self)
2015 {
2069 {
2016 Py_INCREF(self);
2070 Py_INCREF(self);
2017 return (PyObject *)self;
2071 return (PyObject *)self;
2018 }
2072 }
2019
2073
2020 static void index_dealloc(indexObject *self)
2074 static void index_dealloc(indexObject *self)
2021 {
2075 {
2022 _index_clearcaches(self);
2076 _index_clearcaches(self);
2023 Py_XDECREF(self->filteredrevs);
2077 Py_XDECREF(self->filteredrevs);
2024 Py_XDECREF(self->data);
2078 Py_XDECREF(self->data);
2025 Py_XDECREF(self->added);
2079 Py_XDECREF(self->added);
2026 PyObject_Del(self);
2080 PyObject_Del(self);
2027 }
2081 }
2028
2082
2029 static PySequenceMethods index_sequence_methods = {
2083 static PySequenceMethods index_sequence_methods = {
2030 (lenfunc)index_length, /* sq_length */
2084 (lenfunc)index_length, /* sq_length */
2031 0, /* sq_concat */
2085 0, /* sq_concat */
2032 0, /* sq_repeat */
2086 0, /* sq_repeat */
2033 (ssizeargfunc)index_get, /* sq_item */
2087 (ssizeargfunc)index_get, /* sq_item */
2034 0, /* sq_slice */
2088 0, /* sq_slice */
2035 0, /* sq_ass_item */
2089 0, /* sq_ass_item */
2036 0, /* sq_ass_slice */
2090 0, /* sq_ass_slice */
2037 (objobjproc)index_contains, /* sq_contains */
2091 (objobjproc)index_contains, /* sq_contains */
2038 };
2092 };
2039
2093
2040 static PyMappingMethods index_mapping_methods = {
2094 static PyMappingMethods index_mapping_methods = {
2041 (lenfunc)index_length, /* mp_length */
2095 (lenfunc)index_length, /* mp_length */
2042 (binaryfunc)index_getitem, /* mp_subscript */
2096 (binaryfunc)index_getitem, /* mp_subscript */
2043 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2097 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2044 };
2098 };
2045
2099
2046 static PyMethodDef index_methods[] = {
2100 static PyMethodDef index_methods[] = {
2047 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2101 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2048 "return the gca set of the given revs"},
2102 "return the gca set of the given revs"},
2049 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2103 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2050 METH_VARARGS,
2104 METH_VARARGS,
2051 "return the heads of the common ancestors of the given revs"},
2105 "return the heads of the common ancestors of the given revs"},
2052 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2106 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2053 "clear the index caches"},
2107 "clear the index caches"},
2054 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2108 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2055 "get an index entry"},
2109 "get an index entry"},
2056 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2110 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2057 "get head revisions"},
2111 "get head revisions"},
2058 {"insert", (PyCFunction)index_insert, METH_VARARGS,
2112 {"insert", (PyCFunction)index_insert, METH_VARARGS,
2059 "insert an index entry"},
2113 "insert an index entry"},
2060 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2114 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2061 "match a potentially ambiguous node ID"},
2115 "match a potentially ambiguous node ID"},
2062 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2116 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2063 "stats for the index"},
2117 "stats for the index"},
2064 {NULL} /* Sentinel */
2118 {NULL} /* Sentinel */
2065 };
2119 };
2066
2120
2067 static PyGetSetDef index_getset[] = {
2121 static PyGetSetDef index_getset[] = {
2068 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2122 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2069 {NULL} /* Sentinel */
2123 {NULL} /* Sentinel */
2070 };
2124 };
2071
2125
2072 static PyTypeObject indexType = {
2126 static PyTypeObject indexType = {
2073 PyObject_HEAD_INIT(NULL)
2127 PyObject_HEAD_INIT(NULL)
2074 0, /* ob_size */
2128 0, /* ob_size */
2075 "parsers.index", /* tp_name */
2129 "parsers.index", /* tp_name */
2076 sizeof(indexObject), /* tp_basicsize */
2130 sizeof(indexObject), /* tp_basicsize */
2077 0, /* tp_itemsize */
2131 0, /* tp_itemsize */
2078 (destructor)index_dealloc, /* tp_dealloc */
2132 (destructor)index_dealloc, /* tp_dealloc */
2079 0, /* tp_print */
2133 0, /* tp_print */
2080 0, /* tp_getattr */
2134 0, /* tp_getattr */
2081 0, /* tp_setattr */
2135 0, /* tp_setattr */
2082 0, /* tp_compare */
2136 0, /* tp_compare */
2083 0, /* tp_repr */
2137 0, /* tp_repr */
2084 0, /* tp_as_number */
2138 0, /* tp_as_number */
2085 &index_sequence_methods, /* tp_as_sequence */
2139 &index_sequence_methods, /* tp_as_sequence */
2086 &index_mapping_methods, /* tp_as_mapping */
2140 &index_mapping_methods, /* tp_as_mapping */
2087 0, /* tp_hash */
2141 0, /* tp_hash */
2088 0, /* tp_call */
2142 0, /* tp_call */
2089 0, /* tp_str */
2143 0, /* tp_str */
2090 0, /* tp_getattro */
2144 0, /* tp_getattro */
2091 0, /* tp_setattro */
2145 0, /* tp_setattro */
2092 0, /* tp_as_buffer */
2146 0, /* tp_as_buffer */
2093 Py_TPFLAGS_DEFAULT, /* tp_flags */
2147 Py_TPFLAGS_DEFAULT, /* tp_flags */
2094 "revlog index", /* tp_doc */
2148 "revlog index", /* tp_doc */
2095 0, /* tp_traverse */
2149 0, /* tp_traverse */
2096 0, /* tp_clear */
2150 0, /* tp_clear */
2097 0, /* tp_richcompare */
2151 0, /* tp_richcompare */
2098 0, /* tp_weaklistoffset */
2152 0, /* tp_weaklistoffset */
2099 0, /* tp_iter */
2153 0, /* tp_iter */
2100 0, /* tp_iternext */
2154 0, /* tp_iternext */
2101 index_methods, /* tp_methods */
2155 index_methods, /* tp_methods */
2102 0, /* tp_members */
2156 0, /* tp_members */
2103 index_getset, /* tp_getset */
2157 index_getset, /* tp_getset */
2104 0, /* tp_base */
2158 0, /* tp_base */
2105 0, /* tp_dict */
2159 0, /* tp_dict */
2106 0, /* tp_descr_get */
2160 0, /* tp_descr_get */
2107 0, /* tp_descr_set */
2161 0, /* tp_descr_set */
2108 0, /* tp_dictoffset */
2162 0, /* tp_dictoffset */
2109 (initproc)index_init, /* tp_init */
2163 (initproc)index_init, /* tp_init */
2110 0, /* tp_alloc */
2164 0, /* tp_alloc */
2111 };
2165 };
2112
2166
2113 /*
2167 /*
2114 * returns a tuple of the form (index, index, cache) with elements as
2168 * returns a tuple of the form (index, index, cache) with elements as
2115 * follows:
2169 * follows:
2116 *
2170 *
2117 * index: an index object that lazily parses RevlogNG records
2171 * index: an index object that lazily parses RevlogNG records
2118 * cache: if data is inlined, a tuple (index_file_content, 0), else None
2172 * cache: if data is inlined, a tuple (index_file_content, 0), else None
2119 *
2173 *
2120 * added complications are for backwards compatibility
2174 * added complications are for backwards compatibility
2121 */
2175 */
2122 static PyObject *parse_index2(PyObject *self, PyObject *args)
2176 static PyObject *parse_index2(PyObject *self, PyObject *args)
2123 {
2177 {
2124 PyObject *tuple = NULL, *cache = NULL;
2178 PyObject *tuple = NULL, *cache = NULL;
2125 indexObject *idx;
2179 indexObject *idx;
2126 int ret;
2180 int ret;
2127
2181
2128 idx = PyObject_New(indexObject, &indexType);
2182 idx = PyObject_New(indexObject, &indexType);
2129 if (idx == NULL)
2183 if (idx == NULL)
2130 goto bail;
2184 goto bail;
2131
2185
2132 ret = index_init(idx, args);
2186 ret = index_init(idx, args);
2133 if (ret == -1)
2187 if (ret == -1)
2134 goto bail;
2188 goto bail;
2135
2189
2136 if (idx->inlined) {
2190 if (idx->inlined) {
2137 cache = Py_BuildValue("iO", 0, idx->data);
2191 cache = Py_BuildValue("iO", 0, idx->data);
2138 if (cache == NULL)
2192 if (cache == NULL)
2139 goto bail;
2193 goto bail;
2140 } else {
2194 } else {
2141 cache = Py_None;
2195 cache = Py_None;
2142 Py_INCREF(cache);
2196 Py_INCREF(cache);
2143 }
2197 }
2144
2198
2145 tuple = Py_BuildValue("NN", idx, cache);
2199 tuple = Py_BuildValue("NN", idx, cache);
2146 if (!tuple)
2200 if (!tuple)
2147 goto bail;
2201 goto bail;
2148 return tuple;
2202 return tuple;
2149
2203
2150 bail:
2204 bail:
2151 Py_XDECREF(idx);
2205 Py_XDECREF(idx);
2152 Py_XDECREF(cache);
2206 Py_XDECREF(cache);
2153 Py_XDECREF(tuple);
2207 Py_XDECREF(tuple);
2154 return NULL;
2208 return NULL;
2155 }
2209 }
2156
2210
2157 static char parsers_doc[] = "Efficient content parsing.";
2211 static char parsers_doc[] = "Efficient content parsing.";
2158
2212
2159 PyObject *encodedir(PyObject *self, PyObject *args);
2213 PyObject *encodedir(PyObject *self, PyObject *args);
2160 PyObject *pathencode(PyObject *self, PyObject *args);
2214 PyObject *pathencode(PyObject *self, PyObject *args);
2161 PyObject *lowerencode(PyObject *self, PyObject *args);
2215 PyObject *lowerencode(PyObject *self, PyObject *args);
2162
2216
2163 static PyMethodDef methods[] = {
2217 static PyMethodDef methods[] = {
2164 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
2218 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
2165 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2219 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2166 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2220 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2167 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2221 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2222 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
2168 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2223 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2169 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2224 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2170 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2225 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2171 {NULL, NULL}
2226 {NULL, NULL}
2172 };
2227 };
2173
2228
2174 void dirs_module_init(PyObject *mod);
2229 void dirs_module_init(PyObject *mod);
2175
2230
2176 static void module_init(PyObject *mod)
2231 static void module_init(PyObject *mod)
2177 {
2232 {
2178 /* This module constant has two purposes. First, it lets us unit test
2233 /* This module constant has two purposes. First, it lets us unit test
2179 * the ImportError raised without hard-coding any error text. This
2234 * the ImportError raised without hard-coding any error text. This
2180 * means we can change the text in the future without breaking tests,
2235 * means we can change the text in the future without breaking tests,
2181 * even across changesets without a recompile. Second, its presence
2236 * even across changesets without a recompile. Second, its presence
2182 * can be used to determine whether the version-checking logic is
2237 * can be used to determine whether the version-checking logic is
2183 * present, which also helps in testing across changesets without a
2238 * present, which also helps in testing across changesets without a
2184 * recompile. Note that this means the pure-Python version of parsers
2239 * recompile. Note that this means the pure-Python version of parsers
2185 * should not have this module constant. */
2240 * should not have this module constant. */
2186 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2241 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2187
2242
2188 dirs_module_init(mod);
2243 dirs_module_init(mod);
2189
2244
2190 indexType.tp_new = PyType_GenericNew;
2245 indexType.tp_new = PyType_GenericNew;
2191 if (PyType_Ready(&indexType) < 0 ||
2246 if (PyType_Ready(&indexType) < 0 ||
2192 PyType_Ready(&dirstateTupleType) < 0)
2247 PyType_Ready(&dirstateTupleType) < 0)
2193 return;
2248 return;
2194 Py_INCREF(&indexType);
2249 Py_INCREF(&indexType);
2195 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2250 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2196 Py_INCREF(&dirstateTupleType);
2251 Py_INCREF(&dirstateTupleType);
2197 PyModule_AddObject(mod, "dirstatetuple",
2252 PyModule_AddObject(mod, "dirstatetuple",
2198 (PyObject *)&dirstateTupleType);
2253 (PyObject *)&dirstateTupleType);
2199
2254
2200 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2255 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2201 -1, -1, -1, -1, nullid, 20);
2256 -1, -1, -1, -1, nullid, 20);
2202 if (nullentry)
2257 if (nullentry)
2203 PyObject_GC_UnTrack(nullentry);
2258 PyObject_GC_UnTrack(nullentry);
2204 }
2259 }
2205
2260
2206 static int check_python_version(void)
2261 static int check_python_version(void)
2207 {
2262 {
2208 PyObject *sys = PyImport_ImportModule("sys");
2263 PyObject *sys = PyImport_ImportModule("sys");
2209 long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
2264 long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
2210 /* sys.hexversion is a 32-bit number by default, so the -1 case
2265 /* sys.hexversion is a 32-bit number by default, so the -1 case
2211 * should only occur in unusual circumstances (e.g. if sys.hexversion
2266 * should only occur in unusual circumstances (e.g. if sys.hexversion
2212 * is manually set to an invalid value). */
2267 * is manually set to an invalid value). */
2213 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2268 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2214 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2269 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2215 "modules were compiled with Python " PY_VERSION ", but "
2270 "modules were compiled with Python " PY_VERSION ", but "
2216 "Mercurial is currently using Python with sys.hexversion=%ld: "
2271 "Mercurial is currently using Python with sys.hexversion=%ld: "
2217 "Python %s\n at: %s", versionerrortext, hexversion,
2272 "Python %s\n at: %s", versionerrortext, hexversion,
2218 Py_GetVersion(), Py_GetProgramFullPath());
2273 Py_GetVersion(), Py_GetProgramFullPath());
2219 return -1;
2274 return -1;
2220 }
2275 }
2221 return 0;
2276 return 0;
2222 }
2277 }
2223
2278
2224 #ifdef IS_PY3K
2279 #ifdef IS_PY3K
2225 static struct PyModuleDef parsers_module = {
2280 static struct PyModuleDef parsers_module = {
2226 PyModuleDef_HEAD_INIT,
2281 PyModuleDef_HEAD_INIT,
2227 "parsers",
2282 "parsers",
2228 parsers_doc,
2283 parsers_doc,
2229 -1,
2284 -1,
2230 methods
2285 methods
2231 };
2286 };
2232
2287
2233 PyMODINIT_FUNC PyInit_parsers(void)
2288 PyMODINIT_FUNC PyInit_parsers(void)
2234 {
2289 {
2235 PyObject *mod;
2290 PyObject *mod;
2236
2291
2237 if (check_python_version() == -1)
2292 if (check_python_version() == -1)
2238 return;
2293 return;
2239 mod = PyModule_Create(&parsers_module);
2294 mod = PyModule_Create(&parsers_module);
2240 module_init(mod);
2295 module_init(mod);
2241 return mod;
2296 return mod;
2242 }
2297 }
2243 #else
2298 #else
2244 PyMODINIT_FUNC initparsers(void)
2299 PyMODINIT_FUNC initparsers(void)
2245 {
2300 {
2246 PyObject *mod;
2301 PyObject *mod;
2247
2302
2248 if (check_python_version() == -1)
2303 if (check_python_version() == -1)
2249 return;
2304 return;
2250 mod = Py_InitModule3("parsers", methods, parsers_doc);
2305 mod = Py_InitModule3("parsers", methods, parsers_doc);
2251 module_init(mod);
2306 module_init(mod);
2252 }
2307 }
2253 #endif
2308 #endif
General Comments 0
You need to be logged in to leave comments. Login now